Coverage for drivers/cleanup.py : 34%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/python3
2#
3# Copyright (C) Citrix Systems Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; version 2.1 only.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Script to coalesce and garbage collect COW-based SR's in the background
19#
21from sm_typing import Any, Optional, List, override
23import os
24import os.path
25import sys
26import time
27import signal
28import subprocess
29import getopt
30import datetime
31import traceback
32import base64
33import zlib
34import errno
35import stat
37import XenAPI # pylint: disable=import-error
38import util
39import lvutil
40import lvmcache
41import journaler
42import fjournaler
43import lock
44import blktap2
45import xs_errors
46from refcounter import RefCounter
47from ipc import IPCFlag
48from lvmanager import LVActivator
49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG
50from functools import reduce
51from time import monotonic as _time
53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX
54from cowutil import CowImageInfo, CowUtil, getCowUtil
55from lvmcowutil import LV_PREFIX, LvmCowUtil
56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION
58try:
59 from linstorcowutil import LinstorCowUtil
60 from linstorjournaler import LinstorJournaler
61 from linstorvolumemanager import get_controller_uri
62 from linstorvolumemanager import LinstorVolumeManager
63 from linstorvolumemanager import LinstorVolumeManagerError
64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
66 LINSTOR_AVAILABLE = True
67except ImportError:
68 LINSTOR_AVAILABLE = False
70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not
71# possible due to lvhd_stop_using_() not working correctly. However, we leave
72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI
73# record for use by the offline tool (which makes the operation safe by pausing
74# the VM first)
75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True
77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce
79# process "lock", used simply as an indicator that a process already exists
80# that is doing GC/coalesce on this SR (such a process holds the lock, and we
81# check for the fact by trying the lock).
82lockGCRunning = None
84# process "lock" to indicate that the GC process has been activated but may not
85# yet be running, stops a second process from being started.
86LOCK_TYPE_GC_ACTIVE = "gc_active"
87lockGCActive = None
89# Default coalesce error rate limit, in messages per minute. A zero value
90# disables throttling, and a negative value disables error reporting.
91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
93COALESCE_LAST_ERR_TAG = 'last-coalesce-error'
94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate'
95VAR_RUN = "/var/run/"
96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log"
98N_RUNNING_AVERAGE = 10
100NON_PERSISTENT_DIR = '/run/nonpersistent/sm'
102# Signal Handler
103SIGTERM = False
106class AbortException(util.SMException):
107 pass
110def receiveSignal(signalNumber, frame):
111 global SIGTERM
113 util.SMlog("GC: recieved SIGTERM")
114 SIGTERM = True
115 return
118################################################################################
119#
120# Util
121#
122class Util:
123 RET_RC = 1
124 RET_STDOUT = 2
125 RET_STDERR = 4
127 UUID_LEN = 36
129 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024}
131 @staticmethod
132 def log(text) -> None:
133 util.SMlog(text, ident="SMGC")
135 @staticmethod
136 def logException(tag):
137 info = sys.exc_info()
138 if info[0] == SystemExit: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true
139 # this should not be happening when catching "Exception", but it is
140 sys.exit(0)
141 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2]))
142 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
143 Util.log(" ***********************")
144 Util.log(" * E X C E P T I O N *")
145 Util.log(" ***********************")
146 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1]))
147 Util.log(tb)
148 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
150 @staticmethod
151 def doexec(args, expectedRC, inputtext=None, ret=None, log=True):
152 "Execute a subprocess, then return its return code, stdout, stderr"
153 proc = subprocess.Popen(args,
154 stdin=subprocess.PIPE, \
155 stdout=subprocess.PIPE, \
156 stderr=subprocess.PIPE, \
157 shell=True, \
158 close_fds=True)
159 (stdout, stderr) = proc.communicate(inputtext)
160 stdout = str(stdout)
161 stderr = str(stderr)
162 rc = proc.returncode
163 if log:
164 Util.log("`%s`: %s" % (args, rc))
165 if type(expectedRC) != type([]):
166 expectedRC = [expectedRC]
167 if not rc in expectedRC:
168 reason = stderr.strip()
169 if stdout.strip():
170 reason = "%s (stdout: %s)" % (reason, stdout.strip())
171 Util.log("Failed: %s" % reason)
172 raise util.CommandException(rc, args, reason)
174 if ret == Util.RET_RC:
175 return rc
176 if ret == Util.RET_STDERR:
177 return stderr
178 return stdout
180 @staticmethod
181 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut):
182 """execute func in a separate thread and kill it if abortTest signals
183 so"""
184 abortSignaled = abortTest() # check now before we clear resultFlag
185 resultFlag = IPCFlag(ns)
186 resultFlag.clearAll()
187 pid = os.fork()
188 if pid:
189 startTime = _time()
190 try:
191 while True:
192 if resultFlag.test("success"):
193 Util.log(" Child process completed successfully")
194 resultFlag.clear("success")
195 return
196 if resultFlag.test("failure"):
197 resultFlag.clear("failure")
198 raise util.SMException("Child process exited with error")
199 if abortTest() or abortSignaled or SIGTERM:
200 os.killpg(pid, signal.SIGKILL)
201 raise AbortException("Aborting due to signal")
202 if timeOut and _time() - startTime > timeOut:
203 os.killpg(pid, signal.SIGKILL)
204 resultFlag.clearAll()
205 raise util.SMException("Timed out")
206 time.sleep(pollInterval)
207 finally:
208 wait_pid = 0
209 rc = -1
210 count = 0
211 while wait_pid == 0 and count < 10:
212 wait_pid, rc = os.waitpid(pid, os.WNOHANG)
213 if wait_pid == 0:
214 time.sleep(2)
215 count += 1
217 if wait_pid == 0:
218 Util.log("runAbortable: wait for process completion timed out")
219 else:
220 os.setpgrp()
221 try:
222 if func() == ret:
223 resultFlag.set("success")
224 else:
225 resultFlag.set("failure")
226 except Exception as e:
227 Util.log("Child process failed with : (%s)" % e)
228 resultFlag.set("failure")
229 Util.logException("This exception has occured")
230 os._exit(0)
232 @staticmethod
233 def num2str(number):
234 for prefix in ("G", "M", "K"): 234 ↛ 237line 234 didn't jump to line 237, because the loop on line 234 didn't complete
235 if number >= Util.PREFIX[prefix]:
236 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix)
237 return "%s" % number
239 @staticmethod
240 def numBits(val):
241 count = 0
242 while val:
243 count += val & 1
244 val = val >> 1
245 return count
247 @staticmethod
248 def countBits(bitmap1, bitmap2):
249 """return bit count in the bitmap produced by ORing the two bitmaps"""
250 len1 = len(bitmap1)
251 len2 = len(bitmap2)
252 lenLong = len1
253 lenShort = len2
254 bitmapLong = bitmap1
255 if len2 > len1:
256 lenLong = len2
257 lenShort = len1
258 bitmapLong = bitmap2
260 count = 0
261 for i in range(lenShort):
262 val = bitmap1[i] | bitmap2[i]
263 count += Util.numBits(val)
265 for i in range(i + 1, lenLong):
266 val = bitmapLong[i]
267 count += Util.numBits(val)
268 return count
270 @staticmethod
271 def getThisScript():
272 thisScript = util.get_real_path(__file__)
273 if thisScript.endswith(".pyc"):
274 thisScript = thisScript[:-1]
275 return thisScript
278################################################################################
279#
280# XAPI
281#
282class XAPI:
283 USER = "root"
284 PLUGIN_ON_SLAVE = "on-slave"
286 CONFIG_SM = 0
287 CONFIG_OTHER = 1
288 CONFIG_ON_BOOT = 2
289 CONFIG_ALLOW_CACHING = 3
291 CONFIG_NAME = {
292 CONFIG_SM: "sm-config",
293 CONFIG_OTHER: "other-config",
294 CONFIG_ON_BOOT: "on-boot",
295 CONFIG_ALLOW_CACHING: "allow_caching"
296 }
298 class LookupError(util.SMException):
299 pass
301 @staticmethod
302 def getSession():
303 session = XenAPI.xapi_local()
304 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM')
305 return session
307 def __init__(self, session, srUuid):
308 self.sessionPrivate = False
309 self.session = session
310 if self.session is None:
311 self.session = self.getSession()
312 self.sessionPrivate = True
313 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid)
314 self.srRecord = self.session.xenapi.SR.get_record(self._srRef)
315 self.hostUuid = util.get_this_host()
316 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
317 self.task = None
318 self.task_progress = {"coalescable": 0, "done": 0}
320 def __del__(self):
321 if self.sessionPrivate:
322 self.session.xenapi.session.logout()
324 @property
325 def srRef(self):
326 return self._srRef
328 def isPluggedHere(self):
329 pbds = self.getAttachedPBDs()
330 for pbdRec in pbds:
331 if pbdRec["host"] == self._hostRef:
332 return True
333 return False
335 def poolOK(self):
336 host_recs = self.session.xenapi.host.get_all_records()
337 for host_ref, host_rec in host_recs.items():
338 if not host_rec["enabled"]:
339 Util.log("Host %s not enabled" % host_rec["uuid"])
340 return False
341 return True
343 def isMaster(self):
344 if self.srRecord["shared"]:
345 pool = list(self.session.xenapi.pool.get_all_records().values())[0]
346 return pool["master"] == self._hostRef
347 else:
348 pbds = self.getAttachedPBDs()
349 if len(pbds) < 1:
350 raise util.SMException("Local SR not attached")
351 elif len(pbds) > 1:
352 raise util.SMException("Local SR multiply attached")
353 return pbds[0]["host"] == self._hostRef
355 def getAttachedPBDs(self):
356 """Return PBD records for all PBDs of this SR that are currently
357 attached"""
358 attachedPBDs = []
359 pbds = self.session.xenapi.PBD.get_all_records()
360 for pbdRec in pbds.values():
361 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]:
362 attachedPBDs.append(pbdRec)
363 return attachedPBDs
365 def getOnlineHosts(self):
366 return util.get_online_hosts(self.session)
368 def ensureInactive(self, hostRef, args):
369 text = self.session.xenapi.host.call_plugin( \
370 hostRef, self.PLUGIN_ON_SLAVE, "multi", args)
371 Util.log("call-plugin returned: '%s'" % text)
373 def getRecordHost(self, hostRef):
374 return self.session.xenapi.host.get_record(hostRef)
376 def _getRefVDI(self, uuid):
377 return self.session.xenapi.VDI.get_by_uuid(uuid)
379 def getRefVDI(self, vdi):
380 return self._getRefVDI(vdi.uuid)
382 def getRecordVDI(self, uuid):
383 try:
384 ref = self._getRefVDI(uuid)
385 return self.session.xenapi.VDI.get_record(ref)
386 except XenAPI.Failure:
387 return None
389 def singleSnapshotVDI(self, vdi):
390 return self.session.xenapi.VDI.snapshot(vdi.getRef(),
391 {"type": "internal"})
393 def forgetVDI(self, srUuid, vdiUuid):
394 """Forget the VDI, but handle the case where the VDI has already been
395 forgotten (i.e. ignore errors)"""
396 try:
397 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid)
398 self.session.xenapi.VDI.forget(vdiRef)
399 except XenAPI.Failure:
400 pass
402 def getConfigVDI(self, vdi, key):
403 kind = vdi.CONFIG_TYPE[key]
404 if kind == self.CONFIG_SM:
405 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef())
406 elif kind == self.CONFIG_OTHER:
407 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef())
408 elif kind == self.CONFIG_ON_BOOT:
409 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef())
410 elif kind == self.CONFIG_ALLOW_CACHING:
411 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef())
412 else:
413 assert(False)
414 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg)))
415 return cfg
417 def removeFromConfigVDI(self, vdi, key):
418 kind = vdi.CONFIG_TYPE[key]
419 if kind == self.CONFIG_SM:
420 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key)
421 elif kind == self.CONFIG_OTHER:
422 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key)
423 else:
424 assert(False)
426 def addToConfigVDI(self, vdi, key, val):
427 kind = vdi.CONFIG_TYPE[key]
428 if kind == self.CONFIG_SM:
429 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val)
430 elif kind == self.CONFIG_OTHER:
431 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val)
432 else:
433 assert(False)
435 def isSnapshot(self, vdi):
436 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
438 def markCacheSRsDirty(self):
439 sr_refs = self.session.xenapi.SR.get_all_records_where( \
440 'field "local_cache_enabled" = "true"')
441 for sr_ref in sr_refs:
442 Util.log("Marking SR %s dirty" % sr_ref)
443 util.set_dirty(self.session, sr_ref)
445 def srUpdate(self):
446 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"])
447 abortFlag = IPCFlag(self.srRecord["uuid"])
448 task = self.session.xenapi.Async.SR.update(self._srRef)
449 cancelTask = True
450 try:
451 for i in range(60):
452 status = self.session.xenapi.task.get_status(task)
453 if not status == "pending":
454 Util.log("SR.update_asynch status changed to [%s]" % status)
455 cancelTask = False
456 return
457 if abortFlag.test(FLAG_TYPE_ABORT):
458 Util.log("Abort signalled during srUpdate, cancelling task...")
459 try:
460 self.session.xenapi.task.cancel(task)
461 cancelTask = False
462 Util.log("Task cancelled")
463 except:
464 pass
465 return
466 time.sleep(1)
467 finally:
468 if cancelTask:
469 self.session.xenapi.task.cancel(task)
470 self.session.xenapi.task.destroy(task)
471 Util.log("Asynch srUpdate still running, but timeout exceeded.")
473 def update_task(self):
474 self.session.xenapi.task.set_other_config(
475 self.task,
476 {
477 "applies_to": self._srRef
478 })
479 total = self.task_progress['coalescable'] + self.task_progress['done']
480 if (total > 0):
481 self.session.xenapi.task.set_progress(
482 self.task, float(self.task_progress['done']) / total)
484 def create_task(self, label, description):
485 self.task = self.session.xenapi.task.create(label, description)
486 self.update_task()
488 def update_task_progress(self, key, value):
489 self.task_progress[key] = value
490 if self.task:
491 self.update_task()
493 def set_task_status(self, status):
494 if self.task:
495 self.session.xenapi.task.set_status(self.task, status)
498################################################################################
499#
500# VDI
501#
502class VDI(object):
503 """Object representing a VDI of a COW-based SR"""
505 POLL_INTERVAL = 1
506 POLL_TIMEOUT = 30
507 DEVICE_MAJOR = 202
509 # config keys & values
510 DB_VDI_PARENT = "vhd-parent"
511 DB_VDI_TYPE = "vdi_type"
512 DB_VDI_BLOCKS = "vhd-blocks"
513 DB_VDI_PAUSED = "paused"
514 DB_VDI_RELINKING = "relinking"
515 DB_VDI_ACTIVATING = "activating"
516 DB_GC = "gc"
517 DB_COALESCE = "coalesce"
518 DB_LEAFCLSC = "leaf-coalesce" # config key
519 DB_GC_NO_SPACE = "gc_no_space"
520 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce
521 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce
522 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means
523 # no space to snap-coalesce or unable to keep
524 # up with VDI. This is not used by the SM, it
525 # might be used by external components.
526 DB_ONBOOT = "on-boot"
527 ONBOOT_RESET = "reset"
528 DB_ALLOW_CACHING = "allow_caching"
530 CONFIG_TYPE = {
531 DB_VDI_PARENT: XAPI.CONFIG_SM,
532 DB_VDI_TYPE: XAPI.CONFIG_SM,
533 DB_VDI_BLOCKS: XAPI.CONFIG_SM,
534 DB_VDI_PAUSED: XAPI.CONFIG_SM,
535 DB_VDI_RELINKING: XAPI.CONFIG_SM,
536 DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
537 DB_GC: XAPI.CONFIG_OTHER,
538 DB_COALESCE: XAPI.CONFIG_OTHER,
539 DB_LEAFCLSC: XAPI.CONFIG_OTHER,
540 DB_ONBOOT: XAPI.CONFIG_ON_BOOT,
541 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING,
542 DB_GC_NO_SPACE: XAPI.CONFIG_SM
543 }
545 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes
546 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds
547 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating
548 # feasibility of leaf coalesce
550 JRN_RELINK = "relink" # journal entry type for relinking children
551 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced
552 JRN_LEAF = "leaf" # used in coalesce-leaf
554 STR_TREE_INDENT = 4
556 def __init__(self, sr, uuid, vdi_type):
557 self.sr = sr
558 self.scanError = True
559 self.uuid = uuid
560 self.vdi_type = vdi_type
561 self.fileName = ""
562 self.parentUuid = ""
563 self.sizeVirt = -1
564 self._sizePhys = -1
565 self._sizeAllocated = -1
566 self._hidden = False
567 self.parent = None
568 self.children = []
569 self._vdiRef = None
570 self.cowutil = getCowUtil(vdi_type)
571 self._clearRef()
573 @staticmethod
574 def extractUuid(path):
575 raise NotImplementedError("Implement in sub class")
577 def load(self, info=None) -> None:
578 """Load VDI info"""
579 pass
581 def getDriverName(self) -> str:
582 return self.vdi_type
584 def getRef(self):
585 if self._vdiRef is None:
586 self._vdiRef = self.sr.xapi.getRefVDI(self)
587 return self._vdiRef
589 def getConfig(self, key, default=None):
590 config = self.sr.xapi.getConfigVDI(self, key)
591 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true
592 val = config
593 else:
594 val = config.get(key)
595 if val:
596 return val
597 return default
599 def setConfig(self, key, val):
600 self.sr.xapi.removeFromConfigVDI(self, key)
601 self.sr.xapi.addToConfigVDI(self, key, val)
602 Util.log("Set %s = %s for %s" % (key, val, self))
604 def delConfig(self, key):
605 self.sr.xapi.removeFromConfigVDI(self, key)
606 Util.log("Removed %s from %s" % (key, self))
608 def ensureUnpaused(self):
609 if self.getConfig(self.DB_VDI_PAUSED) == "true":
610 Util.log("Unpausing VDI %s" % self)
611 self.unpause()
613 def pause(self, failfast=False) -> None:
614 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid,
615 self.uuid, failfast):
616 raise util.SMException("Failed to pause VDI %s" % self)
618 def _report_tapdisk_unpause_error(self):
619 try:
620 xapi = self.sr.xapi.session.xenapi
621 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid)
622 msg_name = "failed to unpause tapdisk"
623 msg_body = "Failed to unpause tapdisk for VDI %s, " \
624 "VMs using this tapdisk have lost access " \
625 "to the corresponding disk(s)" % self.uuid
626 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body)
627 except Exception as e:
628 util.SMlog("failed to generate message: %s" % e)
630 def unpause(self):
631 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid,
632 self.uuid):
633 self._report_tapdisk_unpause_error()
634 raise util.SMException("Failed to unpause VDI %s" % self)
636 def refresh(self, ignoreNonexistent=True):
637 """Pause-unpause in one step"""
638 self.sr.lock()
639 try:
640 try:
641 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 641 ↛ 643line 641 didn't jump to line 643, because the condition on line 641 was never true
642 self.sr.uuid, self.uuid):
643 self._report_tapdisk_unpause_error()
644 raise util.SMException("Failed to refresh %s" % self)
645 except XenAPI.Failure as e:
646 if util.isInvalidVDI(e) and ignoreNonexistent:
647 Util.log("VDI %s not found, ignoring" % self)
648 return
649 raise
650 finally:
651 self.sr.unlock()
653 def isSnapshot(self):
654 return self.sr.xapi.isSnapshot(self)
656 def isAttachedRW(self):
657 return util.is_attached_rw(
658 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
660 def getVDIBlocks(self):
661 val = self.updateBlockInfo()
662 bitmap = zlib.decompress(base64.b64decode(val))
663 return bitmap
665 def isCoalesceable(self):
666 """A VDI is coalesceable if it has no siblings and is not a leaf"""
667 return not self.scanError and \
668 self.parent and \
669 len(self.parent.children) == 1 and \
670 self.isHidden() and \
671 len(self.children) > 0
673 def isLeafCoalesceable(self):
674 """A VDI is leaf-coalesceable if it has no siblings and is a leaf"""
675 return not self.scanError and \
676 self.parent and \
677 len(self.parent.children) == 1 and \
678 not self.isHidden() and \
679 len(self.children) == 0
681 def canLiveCoalesce(self, speed):
682 """Can we stop-and-leaf-coalesce this VDI? The VDI must be
683 isLeafCoalesceable() already"""
684 feasibleSize = False
685 allowedDownTime = \
686 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT
687 allocated_size = self.getAllocatedSize()
688 if speed:
689 feasibleSize = \
690 allocated_size // speed < allowedDownTime
691 else:
692 feasibleSize = \
693 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE
695 return (feasibleSize or
696 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
698 def getAllPrunable(self):
699 if len(self.children) == 0: # base case
700 # it is possible to have a hidden leaf that was recently coalesced
701 # onto its parent, its children already relinked but not yet
702 # reloaded - in which case it may not be garbage collected yet:
703 # some tapdisks could still be using the file.
704 if self.sr.journaler.get(self.JRN_RELINK, self.uuid):
705 return []
706 if not self.scanError and self.isHidden():
707 return [self]
708 return []
710 thisPrunable = True
711 vdiList = []
712 for child in self.children:
713 childList = child.getAllPrunable()
714 vdiList.extend(childList)
715 if child not in childList:
716 thisPrunable = False
718 # We can destroy the current VDI if all childs are hidden BUT the
719 # current VDI must be hidden too to do that!
720 # Example in this case (after a failed live leaf coalesce):
721 #
722 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees):
723 # SMGC: [32436] b5458d61(1.000G/4.127M)
724 # SMGC: [32436] *OLD_b545(1.000G/4.129M)
725 #
726 # OLD_b545 is hidden and must be removed, but b5458d61 not.
727 # Normally we are not in this function when the delete action is
728 # executed but in `_liveLeafCoalesce`.
730 if not self.scanError and not self.isHidden() and thisPrunable:
731 vdiList.append(self)
732 return vdiList
734 def getSizePhys(self) -> int:
735 return self._sizePhys
737 def getAllocatedSize(self) -> int:
738 return self._sizeAllocated
740 def getTreeRoot(self):
741 "Get the root of the tree that self belongs to"
742 root = self
743 while root.parent:
744 root = root.parent
745 return root
747 def getTreeHeight(self):
748 "Get the height of the subtree rooted at self"
749 if len(self.children) == 0:
750 return 1
752 maxChildHeight = 0
753 for child in self.children:
754 childHeight = child.getTreeHeight()
755 if childHeight > maxChildHeight:
756 maxChildHeight = childHeight
758 return maxChildHeight + 1
760 def getAllLeaves(self) -> List["VDI"]:
761 "Get all leaf nodes in the subtree rooted at self"
762 if len(self.children) == 0:
763 return [self]
765 leaves = []
766 for child in self.children:
767 leaves.extend(child.getAllLeaves())
768 return leaves
770 def updateBlockInfo(self) -> Optional[str]:
771 val = base64.b64encode(self._queryCowBlocks()).decode()
772 try:
773 self.setConfig(VDI.DB_VDI_BLOCKS, val)
774 except Exception:
775 if self.vdi_type != VdiType.QCOW2:
776 raise
777 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it
778 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely).
779 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped")
781 return val
783 def rename(self, uuid) -> None:
784 "Rename the VDI file"
785 assert(not self.sr.vdis.get(uuid))
786 self._clearRef()
787 oldUuid = self.uuid
788 self.uuid = uuid
789 self.children = []
790 # updating the children themselves is the responsibility of the caller
791 del self.sr.vdis[oldUuid]
792 self.sr.vdis[self.uuid] = self
794 def delete(self) -> None:
795 "Physically delete the VDI"
796 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
797 lock.Lock.cleanupAll(self.uuid)
798 self._clear()
800 def getParent(self) -> str:
801 return self.cowutil.getParent(self.path, lambda x: x.strip()) 801 ↛ exitline 801 didn't run the lambda on line 801
803 def repair(self, parent) -> None:
804 self.cowutil.repair(parent)
806 @override
807 def __str__(self) -> str:
808 strHidden = ""
809 if self.isHidden(): 809 ↛ 810line 809 didn't jump to line 810, because the condition on line 809 was never true
810 strHidden = "*"
811 strSizeVirt = "?"
812 if self.sizeVirt > 0: 812 ↛ 813line 812 didn't jump to line 813, because the condition on line 812 was never true
813 strSizeVirt = Util.num2str(self.sizeVirt)
814 strSizePhys = "?"
815 if self._sizePhys > 0: 815 ↛ 816line 815 didn't jump to line 816, because the condition on line 815 was never true
816 strSizePhys = "/%s" % Util.num2str(self._sizePhys)
817 strSizeAllocated = "?"
818 if self._sizeAllocated >= 0:
819 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated)
820 strType = "[{}]".format(self.vdi_type)
822 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt,
823 strSizePhys, strSizeAllocated, strType)
825 def validate(self, fast=False) -> None:
826 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 826 ↛ 827line 826 didn't jump to line 827, because the condition on line 826 was never true
827 raise util.SMException("COW image %s corrupted" % self)
829 def _clear(self):
830 self.uuid = ""
831 self.path = ""
832 self.parentUuid = ""
833 self.parent = None
834 self._clearRef()
836 def _clearRef(self):
837 self._vdiRef = None
839 def _call_plug_cancel(self, hostRef):
840 args = {"path": self.path, "vdi_type": self.vdi_type}
841 self.sr.xapi.session.xenapi.host.call_plugin( \
842 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args)
844 def _call_plugin_coalesce(self, hostRef):
845 args = {"path": self.path, "vdi_type": self.vdi_type}
846 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args))
847 ret = self.sr.xapi.session.xenapi.host.call_plugin( \
848 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args)
849 util.SMlog("DAMS: Remote coalesce returned {}".format(ret))
851 def _doCoalesceOnHost(self, hostRef):
852 self.validate()
853 self.parent.validate(True)
854 self.parent._increaseSizeVirt(self.sizeVirt)
855 self.sr._updateSlavesOnResize(self.parent)
856 #TODO: We might need to make the LV RW on the slave directly for coalesce?
857 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them
859 def abortTest():
860 file = self.sr._gc_running_file(self)
861 try:
862 with open(file, "r") as f:
863 if not f.read():
864 #TODO: Need to call commit cancel on the hostRef if we stop
865 util.SMlog("DAMS: Cancelling")
866 self._call_plug_cancel(hostRef)
867 return True
868 except OSError as e:
869 if e.errno == errno.ENOENT:
870 util.SMlog("File {} does not exist".format(file))
871 else:
872 util.SMlog("IOError: {}".format(e))
873 return True
874 return False
876 #TODO: Add exception handling here like when callinng in a runAbortable situation_doCoalesceCOWImage
877 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef),
878 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
880 self.parent.validate(True)
881 #self._verifyContents(0)
882 self.parent.updateBlockInfo()
884 def _isOpenOnHosts(self) -> Optional[str]:
885 for pbdRecord in self.sr.xapi.getAttachedPBDs():
886 hostRef = pbdRecord["host"]
887 args = {"path": self.path}
888 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \
889 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args))
890 if is_openers:
891 return hostRef
892 return None
894 def _doCoalesce(self) -> None:
895 """Coalesce self onto parent. Only perform the actual coalescing of
896 an image, but not the subsequent relinking. We'll do that as the next step,
897 after reloading the entire SR in case things have changed while we
898 were coalescing"""
899 self.validate()
900 self.parent.validate(True)
901 self.parent._increaseSizeVirt(self.sizeVirt)
902 self.sr._updateSlavesOnResize(self.parent)
903 self._coalesceCowImage(0)
904 self.parent.validate(True)
905 #self._verifyContents(0)
906 self.parent.updateBlockInfo()
908 def _verifyContents(self, timeOut):
909 Util.log(" Coalesce verification on %s" % self)
910 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
911 Util.runAbortable(lambda: self._runTapdiskDiff(), True,
912 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
913 Util.log(" Coalesce verification succeeded")
915 def _runTapdiskDiff(self):
916 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \
917 (self.getDriverName(), self.path, \
918 self.parent.getDriverName(), self.parent.path)
919 Util.doexec(cmd, 0)
920 return True
922 @staticmethod
923 def _reportCoalesceError(vdi, ce):
924 """Reports a coalesce error to XenCenter.
926 vdi: the VDI object on which the coalesce error occured
927 ce: the CommandException that was raised"""
929 msg_name = os.strerror(ce.code)
930 if ce.code == errno.ENOSPC:
931 # TODO We could add more information here, e.g. exactly how much
932 # space is required for the particular coalesce, as well as actions
933 # to be taken by the user and consequences of not taking these
934 # actions.
935 msg_body = 'Run out of space while coalescing.'
936 elif ce.code == errno.EIO:
937 msg_body = 'I/O error while coalescing.'
938 else:
939 msg_body = ''
940 util.SMlog('Coalesce failed on SR %s: %s (%s)'
941 % (vdi.sr.uuid, msg_name, msg_body))
943 # Create a XenCenter message, but don't spam.
944 xapi = vdi.sr.xapi.session.xenapi
945 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid)
946 oth_cfg = xapi.SR.get_other_config(sr_ref)
947 if COALESCE_ERR_RATE_TAG in oth_cfg:
948 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG])
949 else:
950 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
952 xcmsg = False
953 if coalesce_err_rate == 0:
954 xcmsg = True
955 elif coalesce_err_rate > 0:
956 now = datetime.datetime.now()
957 sm_cfg = xapi.SR.get_sm_config(sr_ref)
958 if COALESCE_LAST_ERR_TAG in sm_cfg:
959 # seconds per message (minimum distance in time between two
960 # messages in seconds)
961 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60)
962 last = datetime.datetime.fromtimestamp(
963 float(sm_cfg[COALESCE_LAST_ERR_TAG]))
964 if now - last >= spm:
965 xapi.SR.remove_from_sm_config(sr_ref,
966 COALESCE_LAST_ERR_TAG)
967 xcmsg = True
968 else:
969 xcmsg = True
970 if xcmsg:
971 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG,
972 str(now.strftime('%s')))
973 if xcmsg:
974 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
976 def coalesce(self) -> int:
977 return self.cowutil.coalesce(self.path)
979 @staticmethod
980 def _doCoalesceCowImage(vdi: "VDI"):
981 try:
982 startTime = time.time()
983 allocated_size = vdi.getAllocatedSize()
984 coalesced_size = vdi.coalesce()
985 endTime = time.time()
986 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size)
987 except util.CommandException as ce:
988 # We use try/except for the following piece of code because it runs
989 # in a separate process context and errors will not be caught and
990 # reported by anyone.
991 try:
992 # Report coalesce errors back to user via XC
993 VDI._reportCoalesceError(vdi, ce)
994 except Exception as e:
995 util.SMlog('failed to create XenCenter message: %s' % e)
996 raise ce
997 except:
998 raise
1000 def _vdi_is_raw(self, vdi_path):
1001 """
1002 Given path to vdi determine if it is raw
1003 """
1004 uuid = self.extractUuid(vdi_path)
1005 return self.sr.vdis[uuid].vdi_type == VdiType.RAW
1007 def _coalesceCowImage(self, timeOut):
1008 Util.log(" Running COW coalesce on %s" % self)
1009 def abortTest():
1010 if self.cowutil.isCoalesceableOnRemote():
1011 file = self.sr._gc_running_file(self)
1012 try:
1013 with open(file, "r") as f:
1014 if not f.read():
1015 return True
1016 except OSError as e:
1017 if e.errno == errno.ENOENT:
1018 util.SMlog("File {} does not exist".format(file))
1019 else:
1020 util.SMlog("IOError: {}".format(e))
1021 return True
1022 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1024 try:
1025 util.fistpoint.activate_custom_fn(
1026 "cleanup_coalesceVHD_inject_failure",
1027 util.inject_failure)
1028 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None,
1029 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
1030 except:
1031 # Exception at this phase could indicate a failure in COW coalesce
1032 # or a kill of COW coalesce by runAbortable due to timeOut
1033 # Try a repair and reraise the exception
1034 parent = ""
1035 try:
1036 parent = self.getParent()
1037 if not self._vdi_is_raw(parent):
1038 # Repair error is logged and ignored. Error reraised later
1039 util.SMlog('Coalesce failed on %s, attempting repair on ' \
1040 'parent %s' % (self.uuid, parent))
1041 self.repair(parent)
1042 except Exception as e:
1043 util.SMlog('(error ignored) Failed to repair parent %s ' \
1044 'after failed coalesce on %s, err: %s' %
1045 (parent, self.path, e))
1046 raise
1048 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid)
1050 def _relinkSkip(self) -> None:
1051 """Relink children of this VDI to point to the parent of this VDI"""
1052 abortFlag = IPCFlag(self.sr.uuid)
1053 for child in self.children:
1054 if abortFlag.test(FLAG_TYPE_ABORT): 1054 ↛ 1055line 1054 didn't jump to line 1055, because the condition on line 1054 was never true
1055 raise AbortException("Aborting due to signal")
1056 Util.log(" Relinking %s from %s to %s" % \
1057 (child, self, self.parent))
1058 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid)
1059 child._setParent(self.parent)
1060 self.children = []
1062 def _reloadChildren(self, vdiSkip):
1063 """Pause & unpause all VDIs in the subtree to cause blktap to reload
1064 the COW image metadata for this file in any online VDI"""
1065 abortFlag = IPCFlag(self.sr.uuid)
1066 for child in self.children:
1067 if child == vdiSkip:
1068 continue
1069 if abortFlag.test(FLAG_TYPE_ABORT): 1069 ↛ 1070line 1069 didn't jump to line 1070, because the condition on line 1069 was never true
1070 raise AbortException("Aborting due to signal")
1071 Util.log(" Reloading VDI %s" % child)
1072 child._reload()
1074 def _reload(self):
1075 """Pause & unpause to cause blktap to reload the image metadata"""
1076 for child in self.children: 1076 ↛ 1077line 1076 didn't jump to line 1077, because the loop on line 1076 never started
1077 child._reload()
1079 # only leaves can be attached
1080 if len(self.children) == 0: 1080 ↛ exitline 1080 didn't return from function '_reload', because the condition on line 1080 was never false
1081 try:
1082 self.delConfig(VDI.DB_VDI_RELINKING)
1083 except XenAPI.Failure as e:
1084 if not util.isInvalidVDI(e):
1085 raise
1086 self.refresh()
1088 def _tagChildrenForRelink(self):
1089 if len(self.children) == 0:
1090 retries = 0
1091 try:
1092 while retries < 15:
1093 retries += 1
1094 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
1095 Util.log("VDI %s is activating, wait to relink" %
1096 self.uuid)
1097 else:
1098 self.setConfig(VDI.DB_VDI_RELINKING, "True")
1100 if self.getConfig(VDI.DB_VDI_ACTIVATING):
1101 self.delConfig(VDI.DB_VDI_RELINKING)
1102 Util.log("VDI %s started activating while tagging" %
1103 self.uuid)
1104 else:
1105 return
1106 time.sleep(2)
1108 raise util.SMException("Failed to tag vdi %s for relink" % self)
1109 except XenAPI.Failure as e:
1110 if not util.isInvalidVDI(e):
1111 raise
1113 for child in self.children:
1114 child._tagChildrenForRelink()
1116 def _loadInfoParent(self):
1117 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid)
1118 if ret:
1119 self.parentUuid = ret
1121 def _setParent(self, parent) -> None:
1122 self.cowutil.setParent(self.path, parent.path, False)
1123 self.parent = parent
1124 self.parentUuid = parent.uuid
1125 parent.children.append(self)
1126 try:
1127 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1128 Util.log("Updated the vhd-parent field for child %s with %s" % \
1129 (self.uuid, self.parentUuid))
1130 except:
1131 Util.log("Failed to update %s with vhd-parent field %s" % \
1132 (self.uuid, self.parentUuid))
1134 def isHidden(self) -> bool:
1135 if self._hidden is None: 1135 ↛ 1136line 1135 didn't jump to line 1136, because the condition on line 1135 was never true
1136 self._loadInfoHidden()
1137 return self._hidden
1139 def _loadInfoHidden(self) -> None:
1140 hidden = self.cowutil.getHidden(self.path)
1141 self._hidden = (hidden != 0)
1143 def _setHidden(self, hidden=True) -> None:
1144 self._hidden = None
1145 self.cowutil.setHidden(self.path, hidden)
1146 self._hidden = hidden
1148 def _increaseSizeVirt(self, size, atomic=True) -> None:
1149 """ensure the virtual size of 'self' is at least 'size'. Note that
1150 resizing a COW image must always be offline and atomically: the file must
1151 not be open by anyone and no concurrent operations may take place.
1152 Thus we use the Agent API call for performing paused atomic
1153 operations. If the caller is already in the atomic context, it must
1154 call with atomic = False"""
1155 if self.sizeVirt >= size: 1155 ↛ 1157line 1155 didn't jump to line 1157, because the condition on line 1155 was never false
1156 return
1157 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1158 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1160 msize = self.cowutil.getMaxResizeSize(self.path)
1161 if (size <= msize):
1162 self.cowutil.setSizeVirtFast(self.path, size)
1163 else:
1164 if atomic:
1165 vdiList = self._getAllSubtree()
1166 self.sr.lock()
1167 try:
1168 self.sr.pauseVDIs(vdiList)
1169 try:
1170 self._setSizeVirt(size)
1171 finally:
1172 self.sr.unpauseVDIs(vdiList)
1173 finally:
1174 self.sr.unlock()
1175 else:
1176 self._setSizeVirt(size)
1178 self.sizeVirt = self.cowutil.getSizeVirt(self.path)
1180 def _setSizeVirt(self, size) -> None:
1181 """WARNING: do not call this method directly unless all VDIs in the
1182 subtree are guaranteed to be unplugged (and remain so for the duration
1183 of the operation): this operation is only safe for offline COW images"""
1184 jFile = os.path.join(self.sr.path, self.uuid)
1185 self.cowutil.setSizeVirt(self.path, size, jFile)
1187 def _queryCowBlocks(self) -> bytes:
1188 return self.cowutil.getBlockBitmap(self.path)
1190 def _getCoalescedSizeData(self):
1191 """Get the data size of the resulting image if we coalesce self onto
1192 parent. We calculate the actual size by using the image block allocation
1193 information (as opposed to just adding up the two image sizes to get an
1194 upper bound)"""
1195 # make sure we don't use stale BAT info from vdi_rec since the child
1196 # was writable all this time
1197 self.delConfig(VDI.DB_VDI_BLOCKS)
1198 blocksChild = self.getVDIBlocks()
1199 blocksParent = self.parent.getVDIBlocks()
1200 numBlocks = Util.countBits(blocksChild, blocksParent)
1201 Util.log("Num combined blocks = %d" % numBlocks)
1202 sizeData = numBlocks * self.cowutil.getBlockSize(self.path)
1203 assert(sizeData <= self.sizeVirt)
1204 return sizeData
1206 def _calcExtraSpaceForCoalescing(self) -> int:
1207 sizeData = self._getCoalescedSizeData()
1208 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \
1209 self.cowutil.calcOverheadEmpty(self.sizeVirt)
1210 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1211 return sizeCoalesced - self.parent.getSizePhys()
1213 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1214 """How much extra space in the SR will be required to
1215 [live-]leaf-coalesce this VDI"""
1216 # the space requirements are the same as for inline coalesce
1217 return self._calcExtraSpaceForCoalescing()
1219 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1220 """How much extra space in the SR will be required to
1221 snapshot-coalesce this VDI"""
1222 return self._calcExtraSpaceForCoalescing() + \
1223 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
1225 def _getAllSubtree(self):
1226 """Get self and all VDIs in the subtree of self as a flat list"""
1227 vdiList = [self]
1228 for child in self.children:
1229 vdiList.extend(child._getAllSubtree())
1230 return vdiList
1233class FileVDI(VDI):
1234 """Object representing a VDI in a file-based SR (EXT or NFS)"""
1236 @override
1237 @staticmethod
1238 def extractUuid(path):
1239 fileName = os.path.basename(path)
1240 return os.path.splitext(fileName)[0]
1242 def __init__(self, sr, uuid, vdi_type):
1243 VDI.__init__(self, sr, uuid, vdi_type)
1244 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1246 @override
1247 def load(self, info=None) -> None:
1248 if not info:
1249 if not util.pathexists(self.path):
1250 raise util.SMException("%s not found" % self.path)
1251 try:
1252 info = self.cowutil.getInfo(self.path, self.extractUuid)
1253 except util.SMException:
1254 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid)
1255 return
1256 self.parent = None
1257 self.children = []
1258 self.parentUuid = info.parentUuid
1259 self.sizeVirt = info.sizeVirt
1260 self._sizePhys = info.sizePhys
1261 self._sizeAllocated = info.sizeAllocated
1262 self._hidden = info.hidden
1263 self.scanError = False
1264 self.path = os.path.join(self.sr.path, "%s%s" % \
1265 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]))
1267 @override
1268 def rename(self, uuid) -> None:
1269 oldPath = self.path
1270 VDI.rename(self, uuid)
1271 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1272 self.path = os.path.join(self.sr.path, self.fileName)
1273 assert(not util.pathexists(self.path))
1274 Util.log("Renaming %s -> %s" % (oldPath, self.path))
1275 os.rename(oldPath, self.path)
1277 @override
1278 def delete(self) -> None:
1279 if len(self.children) > 0: 1279 ↛ 1280line 1279 didn't jump to line 1280, because the condition on line 1279 was never true
1280 raise util.SMException("VDI %s has children, can't delete" % \
1281 self.uuid)
1282 try:
1283 self.sr.lock()
1284 try:
1285 os.unlink(self.path)
1286 self.sr.forgetVDI(self.uuid)
1287 finally:
1288 self.sr.unlock()
1289 except OSError:
1290 raise util.SMException("os.unlink(%s) failed" % self.path)
1291 VDI.delete(self)
1293 @override
1294 def getAllocatedSize(self) -> int:
1295 if self._sizeAllocated == -1: 1295 ↛ 1296line 1295 didn't jump to line 1296, because the condition on line 1295 was never true
1296 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1297 return self._sizeAllocated
1300class LVMVDI(VDI):
1301 """Object representing a VDI in an LVM SR"""
1303 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent
1305 @override
1306 def load(self, info=None) -> None:
1307 # `info` is always set. `None` default value is only here to match parent method.
1308 assert info, "No info given to LVMVDI.load"
1309 self.parent = None
1310 self.children = []
1311 self._sizePhys = -1
1312 self._sizeAllocated = -1
1313 self.scanError = info.scanError
1314 self.sizeLV = info.sizeLV
1315 self.sizeVirt = info.sizeVirt
1316 self.fileName = info.lvName
1317 self.lvActive = info.lvActive
1318 self.lvOpen = info.lvOpen
1319 self.lvReadonly = info.lvReadonly
1320 self._hidden = info.hidden
1321 self.parentUuid = info.parentUuid
1322 self.path = os.path.join(self.sr.path, self.fileName)
1323 self.lvmcowutil = LvmCowUtil(self.cowutil)
1325 @override
1326 @staticmethod
1327 def extractUuid(path):
1328 return LvmCowUtil.extractUuid(path)
1330 def inflate(self, size):
1331 """inflate the LV containing the COW image to 'size'"""
1332 if not VdiType.isCowImage(self.vdi_type):
1333 return
1334 self._activate()
1335 self.sr.lock()
1336 try:
1337 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size)
1338 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid)
1339 finally:
1340 self.sr.unlock()
1341 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1342 self._sizePhys = -1
1343 self._sizeAllocated = -1
1345 def deflate(self):
1346 """deflate the LV containing the image to minimum"""
1347 if not VdiType.isCowImage(self.vdi_type):
1348 return
1349 self._activate()
1350 self.sr.lock()
1351 try:
1352 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys())
1353 finally:
1354 self.sr.unlock()
1355 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1356 self._sizePhys = -1
1357 self._sizeAllocated = -1
1359 def inflateFully(self):
1360 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt))
1362 def inflateParentForCoalesce(self):
1363 """Inflate the parent only as much as needed for the purposes of
1364 coalescing"""
1365 if not VdiType.isCowImage(self.parent.vdi_type):
1366 return
1367 inc = self._calcExtraSpaceForCoalescing()
1368 if inc > 0:
1369 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid)
1370 self.parent.inflate(self.parent.sizeLV + inc)
1372 @override
1373 def updateBlockInfo(self) -> Optional[str]:
1374 if VdiType.isCowImage(self.vdi_type):
1375 return VDI.updateBlockInfo(self)
1376 return None
1378 @override
1379 def rename(self, uuid) -> None:
1380 oldUuid = self.uuid
1381 oldLVName = self.fileName
1382 VDI.rename(self, uuid)
1383 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid
1384 self.path = os.path.join(self.sr.path, self.fileName)
1385 assert(not self.sr.lvmCache.checkLV(self.fileName))
1387 self.sr.lvmCache.rename(oldLVName, self.fileName)
1388 if self.sr.lvActivator.get(oldUuid, False):
1389 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
1391 ns = NS_PREFIX_LVM + self.sr.uuid
1392 (cnt, bcnt) = RefCounter.check(oldUuid, ns)
1393 RefCounter.set(self.uuid, cnt, bcnt, ns)
1394 RefCounter.reset(oldUuid, ns)
1396 @override
1397 def delete(self) -> None:
1398 if len(self.children) > 0:
1399 raise util.SMException("VDI %s has children, can't delete" % \
1400 self.uuid)
1401 self.sr.lock()
1402 try:
1403 self.sr.lvmCache.remove(self.fileName)
1404 self.sr.forgetVDI(self.uuid)
1405 finally:
1406 self.sr.unlock()
1407 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
1408 VDI.delete(self)
1410 @override
1411 def getSizePhys(self) -> int:
1412 if self._sizePhys == -1:
1413 self._loadInfoSizePhys()
1414 return self._sizePhys
1416 def _loadInfoSizePhys(self):
1417 """Get the physical utilization of the COW image file. We do it individually
1418 (and not using the COW batch scanner) as an optimization: this info is
1419 relatively expensive and we need it only for VDI's involved in
1420 coalescing."""
1421 if not VdiType.isCowImage(self.vdi_type):
1422 return
1423 self._activate()
1424 self._sizePhys = self.cowutil.getSizePhys(self.path)
1425 if self._sizePhys <= 0:
1426 raise util.SMException("phys size of %s = %d" % \
1427 (self, self._sizePhys))
1429 @override
1430 def getAllocatedSize(self) -> int:
1431 if self._sizeAllocated == -1:
1432 self._loadInfoSizeAllocated()
1433 return self._sizeAllocated
1435 def _loadInfoSizeAllocated(self):
1436 """
1437 Get the allocated size of the COW volume.
1438 """
1439 if not VdiType.isCowImage(self.vdi_type):
1440 return
1441 self._activate()
1442 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1444 @override
1445 def _loadInfoHidden(self) -> None:
1446 if not VdiType.isCowImage(self.vdi_type):
1447 self._hidden = self.sr.lvmCache.getHidden(self.fileName)
1448 else:
1449 VDI._loadInfoHidden(self)
1451 @override
1452 def _setHidden(self, hidden=True) -> None:
1453 if not VdiType.isCowImage(self.vdi_type):
1454 self._hidden = None
1455 self.sr.lvmCache.setHidden(self.fileName, hidden)
1456 self._hidden = hidden
1457 else:
1458 VDI._setHidden(self, hidden)
1460 @override
1461 def __str__(self) -> str:
1462 strType = self.vdi_type
1463 if self.vdi_type == VdiType.RAW:
1464 strType = "RAW"
1465 strHidden = ""
1466 if self.isHidden():
1467 strHidden = "*"
1468 strSizePhys = ""
1469 if self._sizePhys > 0:
1470 strSizePhys = Util.num2str(self._sizePhys)
1471 strSizeAllocated = ""
1472 if self._sizeAllocated >= 0:
1473 strSizeAllocated = Util.num2str(self._sizeAllocated)
1474 strActive = "n"
1475 if self.lvActive:
1476 strActive = "a"
1477 if self.lvOpen:
1478 strActive += "o"
1479 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType,
1480 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated,
1481 Util.num2str(self.sizeLV), strActive)
1483 @override
1484 def validate(self, fast=False) -> None:
1485 if VdiType.isCowImage(self.vdi_type):
1486 VDI.validate(self, fast)
1488 def _setChainRw(self) -> List[str]:
1489 """
1490 Set the readonly LV and children writable.
1491 It's needed because the coalesce can be done by tapdisk directly
1492 and it will need to write parent information for children.
1493 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part.
1494 Return a list of the LV that were previously readonly to be made RO again after the coalesce.
1495 """
1496 was_ro = []
1497 if self.lvReadonly:
1498 self.sr.lvmCache.setReadonly(self.fileName, False)
1499 was_ro.append(self.fileName)
1501 for child in self.children:
1502 if child.lvReadonly:
1503 self.sr.lvmCache.setReadonly(child.fileName, False)
1504 was_ro.append(child.fileName)
1506 return was_ro
1508 def _setChainRo(self, was_ro: List[str]) -> None:
1509 """Set the list of LV in parameters to readonly"""
1510 for lvName in was_ro:
1511 self.sr.lvmCache.setReadonly(lvName, True)
1513 @override
1514 def _doCoalesce(self) -> None:
1515 """LVMVDI parents must first be activated, inflated, and made writable"""
1516 was_ro = []
1517 try:
1518 self._activateChain()
1519 self.sr.lvmCache.setReadonly(self.parent.fileName, False)
1520 self.parent.validate()
1521 self.inflateParentForCoalesce()
1522 was_ro = self._setChainRw()
1523 VDI._doCoalesce(self)
1524 finally:
1525 self.parent._loadInfoSizePhys()
1526 self.parent.deflate()
1527 self.sr.lvmCache.setReadonly(self.parent.fileName, True)
1528 self._setChainRo(was_ro)
1530 @override
1531 def _setParent(self, parent) -> None:
1532 self._activate()
1533 if self.lvReadonly:
1534 self.sr.lvmCache.setReadonly(self.fileName, False)
1536 try:
1537 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW)
1538 finally:
1539 if self.lvReadonly:
1540 self.sr.lvmCache.setReadonly(self.fileName, True)
1541 self._deactivate()
1542 self.parent = parent
1543 self.parentUuid = parent.uuid
1544 parent.children.append(self)
1545 try:
1546 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1547 Util.log("Updated the VDI-parent field for child %s with %s" % \
1548 (self.uuid, self.parentUuid))
1549 except:
1550 Util.log("Failed to update the VDI-parent with %s for child %s" % \
1551 (self.parentUuid, self.uuid))
1553 def _activate(self):
1554 self.sr.lvActivator.activate(self.uuid, self.fileName, False)
1556 def _activateChain(self):
1557 vdi = self
1558 while vdi:
1559 vdi._activate()
1560 vdi = vdi.parent
1562 def _deactivate(self):
1563 self.sr.lvActivator.deactivate(self.uuid, False)
1565 @override
1566 def _increaseSizeVirt(self, size, atomic=True) -> None:
1567 "ensure the virtual size of 'self' is at least 'size'"
1568 self._activate()
1569 if VdiType.isCowImage(self.vdi_type):
1570 VDI._increaseSizeVirt(self, size, atomic)
1571 return
1573 # raw VDI case
1574 offset = self.sizeLV
1575 if self.sizeVirt < size:
1576 oldSize = self.sizeLV
1577 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size)
1578 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV))
1579 self.sr.lvmCache.setSize(self.fileName, self.sizeLV)
1580 offset = oldSize
1581 unfinishedZero = False
1582 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid)
1583 if jval:
1584 unfinishedZero = True
1585 offset = int(jval)
1586 length = self.sizeLV - offset
1587 if not length:
1588 return
1590 if unfinishedZero:
1591 Util.log(" ==> Redoing unfinished zeroing out")
1592 else:
1593 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \
1594 str(offset))
1595 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1596 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1597 func = lambda: util.zeroOut(self.path, offset, length)
1598 Util.runAbortable(func, True, self.sr.uuid, abortTest,
1599 VDI.POLL_INTERVAL, 0)
1600 self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
1602 @override
1603 def _setSizeVirt(self, size) -> None:
1604 """WARNING: do not call this method directly unless all VDIs in the
1605 subtree are guaranteed to be unplugged (and remain so for the duration
1606 of the operation): this operation is only safe for offline COW images."""
1607 self._activate()
1608 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid)
1609 try:
1610 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile)
1611 finally:
1612 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid)
1614 @override
1615 def _queryCowBlocks(self) -> bytes:
1616 self._activate()
1617 return VDI._queryCowBlocks(self)
1619 @override
1620 def _calcExtraSpaceForCoalescing(self) -> int:
1621 if not VdiType.isCowImage(self.parent.vdi_type):
1622 return 0 # raw parents are never deflated in the first place
1623 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData())
1624 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1625 return sizeCoalesced - self.parent.sizeLV
1627 @override
1628 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1629 """How much extra space in the SR will be required to
1630 [live-]leaf-coalesce this VDI"""
1631 # we can deflate the leaf to minimize the space requirements
1632 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys())
1633 return self._calcExtraSpaceForCoalescing() - deflateDiff
1635 @override
1636 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1637 return self._calcExtraSpaceForCoalescing() + \
1638 lvutil.calcSizeLV(self.getSizePhys())
1641class LinstorVDI(VDI):
1642 """Object representing a VDI in a LINSTOR SR"""
1644 VOLUME_LOCK_TIMEOUT = 30
1646 @override
1647 def load(self, info=None) -> None:
1648 self.parentUuid = info.parentUuid
1649 self.scanError = True
1650 self.parent = None
1651 self.children = []
1653 self.fileName = self.sr._linstor.get_volume_name(self.uuid)
1654 self.path = self.sr._linstor.build_device_path(self.fileName)
1655 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType)
1657 if not info:
1658 try:
1659 info = self.linstorcowutil.get_info(self.uuid)
1660 except util.SMException:
1661 Util.log(
1662 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid)
1663 )
1664 return
1666 self.parentUuid = info.parentUuid
1667 self.sizeVirt = info.sizeVirt
1668 self._sizePhys = -1
1669 self._sizeAllocated = -1
1670 self.drbd_size = -1
1671 self._hidden = info.hidden
1672 self.scanError = False
1674 @override
1675 def getSizePhys(self, fetch=False) -> int:
1676 if self._sizePhys < 0 or fetch:
1677 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid)
1678 return self._sizePhys
1680 def getDrbdSize(self, fetch=False):
1681 if self.drbd_size < 0 or fetch:
1682 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid)
1683 return self.drbd_size
1685 @override
1686 def getAllocatedSize(self) -> int:
1687 if self._sizeAllocated == -1:
1688 if VdiType.isCowImage(self.vdi_type):
1689 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid)
1690 return self._sizeAllocated
1692 def inflate(self, size):
1693 if not VdiType.isCowImage(self.vdi_type):
1694 return
1695 self.sr.lock()
1696 try:
1697 # Ensure we use the real DRBD size and not the cached one.
1698 # Why? Because this attribute can be changed if volume is resized by user.
1699 self.drbd_size = self.getDrbdSize(fetch=True)
1700 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size)
1701 finally:
1702 self.sr.unlock()
1703 self.drbd_size = -1
1704 self._sizePhys = -1
1705 self._sizeAllocated = -1
1707 def deflate(self):
1708 if not VdiType.isCowImage(self.vdi_type):
1709 return
1710 self.sr.lock()
1711 try:
1712 # Ensure we use the real sizes and not the cached info.
1713 self.drbd_size = self.getDrbdSize(fetch=True)
1714 self._sizePhys = self.getSizePhys(fetch=True)
1715 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False)
1716 finally:
1717 self.sr.unlock()
1718 self.drbd_size = -1
1719 self._sizePhys = -1
1720 self._sizeAllocated = -1
1722 def inflateFully(self):
1723 if VdiType.isCowImage(self.vdi_type):
1724 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt))
1726 @override
1727 def rename(self, uuid) -> None:
1728 Util.log('Renaming {} -> {} (path={})'.format(
1729 self.uuid, uuid, self.path
1730 ))
1731 self.sr._linstor.update_volume_uuid(self.uuid, uuid)
1732 VDI.rename(self, uuid)
1734 @override
1735 def delete(self) -> None:
1736 if len(self.children) > 0:
1737 raise util.SMException(
1738 'VDI {} has children, can\'t delete'.format(self.uuid)
1739 )
1740 self.sr.lock()
1741 try:
1742 self.sr._linstor.destroy_volume(self.uuid)
1743 self.sr.forgetVDI(self.uuid)
1744 finally:
1745 self.sr.unlock()
1746 VDI.delete(self)
1748 @override
1749 def validate(self, fast=False) -> None:
1750 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success:
1751 raise util.SMException('COW image {} corrupted'.format(self))
1753 @override
1754 def pause(self, failfast=False) -> None:
1755 self.sr._linstor.ensure_volume_is_not_locked(
1756 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1757 )
1758 return super(LinstorVDI, self).pause(failfast)
1760 @override
1761 def coalesce(self) -> int:
1762 # Note: We raise `SMException` here to skip the current coalesce in case of failure.
1763 # Using another exception we can't execute the next coalesce calls.
1764 return self.linstorcowutil.force_coalesce(self.path)
1766 @override
1767 def getParent(self) -> str:
1768 return self.linstorcowutil.get_parent(
1769 self.sr._linstor.get_volume_uuid_from_device_path(self.path)
1770 )
1772 @override
1773 def repair(self, parent_uuid) -> None:
1774 self.linstorcowutil.force_repair(
1775 self.sr._linstor.get_device_path(parent_uuid)
1776 )
1778 @override
1779 def _relinkSkip(self) -> None:
1780 abortFlag = IPCFlag(self.sr.uuid)
1781 for child in self.children:
1782 if abortFlag.test(FLAG_TYPE_ABORT):
1783 raise AbortException('Aborting due to signal')
1784 Util.log(
1785 ' Relinking {} from {} to {}'.format(
1786 child, self, self.parent
1787 )
1788 )
1790 session = child.sr.xapi.session
1791 sr_uuid = child.sr.uuid
1792 vdi_uuid = child.uuid
1793 try:
1794 self.sr._linstor.ensure_volume_is_not_locked(
1795 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1796 )
1797 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid)
1798 child._setParent(self.parent)
1799 finally:
1800 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
1801 self.children = []
1803 @override
1804 def _setParent(self, parent) -> None:
1805 self.sr._linstor.get_device_path(self.uuid)
1806 self.linstorcowutil.force_parent(self.path, parent.path)
1807 self.parent = parent
1808 self.parentUuid = parent.uuid
1809 parent.children.append(self)
1810 try:
1811 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1812 Util.log("Updated the vhd-parent field for child %s with %s" % \
1813 (self.uuid, self.parentUuid))
1814 except:
1815 Util.log("Failed to update %s with vhd-parent field %s" % \
1816 (self.uuid, self.parentUuid))
1818 @override
1819 def _doCoalesce(self) -> None:
1820 try:
1821 self._activateChain()
1822 self.parent.validate()
1823 self._inflateParentForCoalesce()
1824 VDI._doCoalesce(self)
1825 finally:
1826 self.parent.deflate()
1828 def _activateChain(self):
1829 vdi = self
1830 while vdi:
1831 try:
1832 p = self.sr._linstor.get_device_path(vdi.uuid)
1833 except Exception as e:
1834 # Use SMException to skip coalesce.
1835 # Otherwise the GC is stopped...
1836 raise util.SMException(str(e))
1837 vdi = vdi.parent
1839 @override
1840 def _setHidden(self, hidden=True) -> None:
1841 HIDDEN_TAG = 'hidden'
1843 if not VdiType.isCowImage(self.vdi_type):
1844 self._hidden = None
1845 self.sr._linstor.update_volume_metadata(self.uuid, {
1846 HIDDEN_TAG: hidden
1847 })
1848 self._hidden = hidden
1849 else:
1850 VDI._setHidden(self, hidden)
1852 @override
1853 def _increaseSizeVirt(self, size, atomic=True):
1854 if self.vdi_type == VdiType.RAW:
1855 offset = self.drbd_size
1856 if self.sizeVirt < size:
1857 oldSize = self.drbd_size
1858 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size)
1859 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size))
1860 self.sr._linstor.resize_volume(self.uuid, self.drbd_size)
1861 offset = oldSize
1862 unfinishedZero = False
1863 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid)
1864 if jval:
1865 unfinishedZero = True
1866 offset = int(jval)
1867 length = self.drbd_size - offset
1868 if not length:
1869 return
1871 if unfinishedZero:
1872 Util.log(" ==> Redoing unfinished zeroing out")
1873 else:
1874 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset))
1875 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1876 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1877 func = lambda: util.zeroOut(self.path, offset, length)
1878 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
1879 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid)
1880 return
1882 if self.sizeVirt >= size:
1883 return
1884 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1885 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1887 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024
1888 if (size <= msize):
1889 self.linstorcowutil.set_size_virt_fast(self.path, size)
1890 else:
1891 if atomic:
1892 vdiList = self._getAllSubtree()
1893 self.sr.lock()
1894 try:
1895 self.sr.pauseVDIs(vdiList)
1896 try:
1897 self._setSizeVirt(size)
1898 finally:
1899 self.sr.unpauseVDIs(vdiList)
1900 finally:
1901 self.sr.unlock()
1902 else:
1903 self._setSizeVirt(size)
1905 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid)
1907 @override
1908 def _setSizeVirt(self, size) -> None:
1909 jfile = self.uuid + '-jvhd'
1910 self.sr._linstor.create_volume(
1911 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile
1912 )
1913 try:
1914 self.inflate(self.linstorcowutil.compute_volume_size(size))
1915 self.linstorcowutil.set_size_virt(self.path, size, jfile)
1916 finally:
1917 try:
1918 self.sr._linstor.destroy_volume(jfile)
1919 except Exception:
1920 # We can ignore it, in any case this volume is not persistent.
1921 pass
1923 @override
1924 def _queryCowBlocks(self) -> bytes:
1925 return self.linstorcowutil.get_block_bitmap(self.uuid)
1927 def _inflateParentForCoalesce(self):
1928 if not VdiType.isCowImage(self.parent.vdi_type):
1929 return
1930 inc = self._calcExtraSpaceForCoalescing()
1931 if inc > 0:
1932 self.parent.inflate(self.parent.getDrbdSize() + inc)
1934 @override
1935 def _calcExtraSpaceForCoalescing(self) -> int:
1936 if not VdiType.isCowImage(self.parent.vdi_type):
1937 return 0
1938 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData())
1939 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced))
1940 return size_coalesced - self.parent.getDrbdSize()
1942 @override
1943 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1944 assert self.getDrbdSize() > 0
1945 assert self.getSizePhys() > 0
1946 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1947 assert deflate_diff >= 0
1948 return self._calcExtraSpaceForCoalescing() - deflate_diff
1950 @override
1951 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1952 assert self.getSizePhys() > 0
1953 return self._calcExtraSpaceForCoalescing() + \
1954 LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1956################################################################################
1957#
1958# SR
1959#
1960class SR(object):
1961 class LogFilter:
1962 def __init__(self, sr):
1963 self.sr = sr
1964 self.stateLogged = False
1965 self.prevState = {}
1966 self.currState = {}
1968 def logState(self):
1969 changes = ""
1970 self.currState.clear()
1971 for vdi in self.sr.vdiTrees:
1972 self.currState[vdi.uuid] = self._getTreeStr(vdi)
1973 if not self.prevState.get(vdi.uuid) or \
1974 self.prevState[vdi.uuid] != self.currState[vdi.uuid]:
1975 changes += self.currState[vdi.uuid]
1977 for uuid in self.prevState:
1978 if not self.currState.get(uuid):
1979 changes += "Tree %s gone\n" % uuid
1981 result = "SR %s (%d VDIs in %d COW trees): " % \
1982 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
1984 if len(changes) > 0:
1985 if self.stateLogged:
1986 result += "showing only COW trees that changed:"
1987 result += "\n%s" % changes
1988 else:
1989 result += "no changes"
1991 for line in result.split("\n"):
1992 Util.log("%s" % line)
1993 self.prevState.clear()
1994 for key, val in self.currState.items():
1995 self.prevState[key] = val
1996 self.stateLogged = True
1998 def logNewVDI(self, uuid):
1999 if self.stateLogged:
2000 Util.log("Found new VDI when scanning: %s" % uuid)
2002 def _getTreeStr(self, vdi, indent=8):
2003 treeStr = "%s%s\n" % (" " * indent, vdi)
2004 for child in vdi.children:
2005 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT)
2006 return treeStr
2008 TYPE_FILE = "file"
2009 TYPE_LVHD = "lvhd"
2010 TYPE_LINSTOR = "linstor"
2011 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR]
2013 LOCK_RETRY_INTERVAL = 3
2014 LOCK_RETRY_ATTEMPTS = 20
2015 LOCK_RETRY_ATTEMPTS_LOCK = 100
2017 SCAN_RETRY_ATTEMPTS = 3
2019 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM)
2020 TMP_RENAME_PREFIX = "OLD_"
2022 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline"
2023 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override"
2025 @staticmethod
2026 def getInstance(uuid, xapiSession, createLock=True, force=False):
2027 xapi = XAPI(xapiSession, uuid)
2028 type = normalizeType(xapi.srRecord["type"])
2029 if type == SR.TYPE_FILE:
2030 return FileSR(uuid, xapi, createLock, force)
2031 elif type == SR.TYPE_LVHD:
2032 return LVMSR(uuid, xapi, createLock, force)
2033 elif type == SR.TYPE_LINSTOR:
2034 return LinstorSR(uuid, xapi, createLock, force)
2035 raise util.SMException("SR type %s not recognized" % type)
2037 def __init__(self, uuid, xapi, createLock, force):
2038 self.logFilter = self.LogFilter(self)
2039 self.uuid = uuid
2040 self.path = ""
2041 self.name = ""
2042 self.vdis = {}
2043 self.vdiTrees = []
2044 self.journaler = None
2045 self.xapi = xapi
2046 self._locked = 0
2047 self._srLock = None
2048 if createLock: 2048 ↛ 2049line 2048 didn't jump to line 2049, because the condition on line 2048 was never true
2049 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid)
2050 else:
2051 Util.log("Requested no SR locking")
2052 self.name = self.xapi.srRecord["name_label"]
2053 self._failedCoalesceTargets = []
2055 if not self.xapi.isPluggedHere():
2056 if force: 2056 ↛ 2057line 2056 didn't jump to line 2057, because the condition on line 2056 was never true
2057 Util.log("SR %s not attached on this host, ignoring" % uuid)
2058 else:
2059 if not self.wait_for_plug():
2060 raise util.SMException("SR %s not attached on this host" % uuid)
2062 if force: 2062 ↛ 2063line 2062 didn't jump to line 2063, because the condition on line 2062 was never true
2063 Util.log("Not checking if we are Master (SR %s)" % uuid)
2064 elif not self.xapi.isMaster(): 2064 ↛ 2065line 2064 didn't jump to line 2065, because the condition on line 2064 was never true
2065 raise util.SMException("This host is NOT master, will not run")
2067 self.no_space_candidates = {}
2069 def msg_cleared(self, xapi_session, msg_ref):
2070 try:
2071 msg = xapi_session.xenapi.message.get_record(msg_ref)
2072 except XenAPI.Failure:
2073 return True
2075 return msg is None
2077 def check_no_space_candidates(self):
2078 xapi_session = self.xapi.getSession()
2080 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE)
2081 if self.no_space_candidates:
2082 if msg_id is None or self.msg_cleared(xapi_session, msg_id):
2083 util.SMlog("Could not coalesce due to a lack of space "
2084 f"in SR {self.uuid}")
2085 msg_body = ("Unable to perform data coalesce due to a lack "
2086 f"of space in SR {self.uuid}")
2087 msg_id = xapi_session.xenapi.message.create(
2088 'SM_GC_NO_SPACE',
2089 3,
2090 "SR",
2091 self.uuid,
2092 msg_body)
2093 xapi_session.xenapi.SR.remove_from_sm_config(
2094 self.xapi.srRef, VDI.DB_GC_NO_SPACE)
2095 xapi_session.xenapi.SR.add_to_sm_config(
2096 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id)
2098 for candidate in self.no_space_candidates.values():
2099 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id)
2100 elif msg_id is not None:
2101 # Everything was coalescable, remove the message
2102 xapi_session.xenapi.message.destroy(msg_id)
2104 def clear_no_space_msg(self, vdi):
2105 msg_id = None
2106 try:
2107 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE)
2108 except XenAPI.Failure:
2109 pass
2111 self.no_space_candidates.pop(vdi.uuid, None)
2112 if msg_id is not None: 2112 ↛ exitline 2112 didn't return from function 'clear_no_space_msg', because the condition on line 2112 was never false
2113 vdi.delConfig(VDI.DB_GC_NO_SPACE)
2116 def wait_for_plug(self):
2117 for _ in range(1, 10):
2118 time.sleep(2)
2119 if self.xapi.isPluggedHere():
2120 return True
2121 return False
2123 def gcEnabled(self, refresh=True):
2124 if refresh:
2125 self.xapi.srRecord = \
2126 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef)
2127 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false":
2128 Util.log("GC is disabled for this SR, abort")
2129 return False
2130 return True
2132 def scan(self, force=False) -> None:
2133 """Scan the SR and load VDI info for each VDI. If called repeatedly,
2134 update VDI objects if they already exist"""
2135 pass
2137 def scanLocked(self, force=False):
2138 self.lock()
2139 try:
2140 self.scan(force)
2141 finally:
2142 self.unlock()
2144 def getVDI(self, uuid):
2145 return self.vdis.get(uuid)
2147 def hasWork(self):
2148 if len(self.findGarbage()) > 0:
2149 return True
2150 if self.findCoalesceable():
2151 return True
2152 if self.findLeafCoalesceable():
2153 return True
2154 if self.needUpdateBlockInfo():
2155 return True
2156 return False
2158 def findCoalesceable(self):
2159 """Find a coalesceable VDI. Return a vdi that should be coalesced
2160 (choosing one among all coalesceable candidates according to some
2161 criteria) or None if there is no VDI that could be coalesced"""
2163 candidates = []
2165 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE)
2166 if srSwitch == "false":
2167 Util.log("Coalesce disabled for this SR")
2168 return candidates
2170 # finish any VDI for which a relink journal entry exists first
2171 journals = self.journaler.getAll(VDI.JRN_RELINK)
2172 for uuid in journals:
2173 vdi = self.getVDI(uuid)
2174 if vdi and vdi not in self._failedCoalesceTargets:
2175 return vdi
2177 for vdi in self.vdis.values():
2178 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
2179 candidates.append(vdi)
2180 Util.log("%s is coalescable" % vdi.uuid)
2182 self.xapi.update_task_progress("coalescable", len(candidates))
2184 # pick one in the tallest tree
2185 treeHeight = dict()
2186 for c in candidates:
2187 height = c.getTreeRoot().getTreeHeight()
2188 if treeHeight.get(height):
2189 treeHeight[height].append(c)
2190 else:
2191 treeHeight[height] = [c]
2193 freeSpace = self.getFreeSpace()
2194 heights = list(treeHeight.keys())
2195 heights.sort(reverse=True)
2196 for h in heights:
2197 for c in treeHeight[h]:
2198 spaceNeeded = c._calcExtraSpaceForCoalescing()
2199 if spaceNeeded <= freeSpace:
2200 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h))
2201 self.clear_no_space_msg(c)
2202 return c
2203 else:
2204 self.no_space_candidates[c.uuid] = c
2205 Util.log("No space to coalesce %s (free space: %d)" % \
2206 (c, freeSpace))
2207 return None
2209 def getSwitch(self, key):
2210 return self.xapi.srRecord["other_config"].get(key)
2212 def forbiddenBySwitch(self, switch, condition, fail_msg):
2213 srSwitch = self.getSwitch(switch)
2214 ret = False
2215 if srSwitch:
2216 ret = srSwitch == condition
2218 if ret:
2219 Util.log(fail_msg)
2221 return ret
2223 def leafCoalesceForbidden(self):
2224 return (self.forbiddenBySwitch(VDI.DB_COALESCE,
2225 "false",
2226 "Coalesce disabled for this SR") or
2227 self.forbiddenBySwitch(VDI.DB_LEAFCLSC,
2228 VDI.LEAFCLSC_DISABLED,
2229 "Leaf-coalesce disabled for this SR"))
2231 def findLeafCoalesceable(self):
2232 """Find leaf-coalesceable VDIs in each COW tree"""
2234 candidates = []
2235 if self.leafCoalesceForbidden():
2236 return candidates
2238 self.gatherLeafCoalesceable(candidates)
2240 self.xapi.update_task_progress("coalescable", len(candidates))
2242 freeSpace = self.getFreeSpace()
2243 for candidate in candidates:
2244 # check the space constraints to see if leaf-coalesce is actually
2245 # feasible for this candidate
2246 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing()
2247 spaceNeededLive = spaceNeeded
2248 if spaceNeeded > freeSpace:
2249 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing()
2250 if candidate.canLiveCoalesce(self.getStorageSpeed()):
2251 spaceNeeded = spaceNeededLive
2253 if spaceNeeded <= freeSpace:
2254 Util.log("Leaf-coalesce candidate: %s" % candidate)
2255 self.clear_no_space_msg(candidate)
2256 return candidate
2257 else:
2258 Util.log("No space to leaf-coalesce %s (free space: %d)" % \
2259 (candidate, freeSpace))
2260 if spaceNeededLive <= freeSpace:
2261 Util.log("...but enough space if skip snap-coalesce")
2262 candidate.setConfig(VDI.DB_LEAFCLSC,
2263 VDI.LEAFCLSC_OFFLINE)
2264 self.no_space_candidates[candidate.uuid] = candidate
2266 return None
2268 def gatherLeafCoalesceable(self, candidates):
2269 for vdi in self.vdis.values():
2270 if not vdi.isLeafCoalesceable():
2271 continue
2272 if vdi in self._failedCoalesceTargets:
2273 continue
2274 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET:
2275 Util.log("Skipping reset-on-boot %s" % vdi)
2276 continue
2277 if vdi.getConfig(vdi.DB_ALLOW_CACHING):
2278 Util.log("Skipping allow_caching=true %s" % vdi)
2279 continue
2280 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED:
2281 Util.log("Leaf-coalesce disabled for %s" % vdi)
2282 continue
2283 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or
2284 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
2285 continue
2286 candidates.append(vdi)
2288 def coalesce(self, vdi, dryRun=False):
2289 """Coalesce vdi onto parent"""
2290 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent))
2291 if dryRun: 2291 ↛ 2292line 2291 didn't jump to line 2292, because the condition on line 2291 was never true
2292 return
2294 try:
2295 self._coalesce(vdi)
2296 except util.SMException as e:
2297 if isinstance(e, AbortException): 2297 ↛ 2298line 2297 didn't jump to line 2298, because the condition on line 2297 was never true
2298 self.cleanup()
2299 raise
2300 else:
2301 self._failedCoalesceTargets.append(vdi)
2302 Util.logException("coalesce")
2303 Util.log("Coalesce failed, skipping")
2304 self.cleanup()
2306 def coalesceLeaf(self, vdi, dryRun=False):
2307 """Leaf-coalesce vdi onto parent"""
2308 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent))
2309 if dryRun:
2310 return
2312 try:
2313 uuid = vdi.uuid
2314 try:
2315 # "vdi" object will no longer be valid after this call
2316 self._coalesceLeaf(vdi)
2317 finally:
2318 vdi = self.getVDI(uuid)
2319 if vdi:
2320 vdi.delConfig(vdi.DB_LEAFCLSC)
2321 except AbortException:
2322 self.cleanup()
2323 raise
2324 except (util.SMException, XenAPI.Failure) as e:
2325 self._failedCoalesceTargets.append(vdi)
2326 Util.logException("leaf-coalesce")
2327 Util.log("Leaf-coalesce failed on %s, skipping" % vdi)
2328 self.cleanup()
2330 def garbageCollect(self, dryRun=False):
2331 vdiList = self.findGarbage()
2332 Util.log("Found %d VDIs for deletion:" % len(vdiList))
2333 for vdi in vdiList:
2334 Util.log(" %s" % vdi)
2335 if not dryRun:
2336 self.deleteVDIs(vdiList)
2337 self.cleanupJournals(dryRun)
2339 def findGarbage(self):
2340 vdiList = []
2341 for vdi in self.vdiTrees:
2342 vdiList.extend(vdi.getAllPrunable())
2343 return vdiList
2345 def deleteVDIs(self, vdiList) -> None:
2346 for vdi in vdiList:
2347 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT):
2348 raise AbortException("Aborting due to signal")
2349 Util.log("Deleting unlinked VDI %s" % vdi)
2350 self.deleteVDI(vdi)
2352 def deleteVDI(self, vdi) -> None:
2353 assert(len(vdi.children) == 0)
2354 del self.vdis[vdi.uuid]
2355 if vdi.parent: 2355 ↛ 2357line 2355 didn't jump to line 2357, because the condition on line 2355 was never false
2356 vdi.parent.children.remove(vdi)
2357 if vdi in self.vdiTrees: 2357 ↛ 2358line 2357 didn't jump to line 2358, because the condition on line 2357 was never true
2358 self.vdiTrees.remove(vdi)
2359 vdi.delete()
2361 def forgetVDI(self, vdiUuid) -> None:
2362 self.xapi.forgetVDI(self.uuid, vdiUuid)
2364 def pauseVDIs(self, vdiList) -> None:
2365 paused = []
2366 failed = False
2367 for vdi in vdiList:
2368 try:
2369 vdi.pause()
2370 paused.append(vdi)
2371 except:
2372 Util.logException("pauseVDIs")
2373 failed = True
2374 break
2376 if failed:
2377 self.unpauseVDIs(paused)
2378 raise util.SMException("Failed to pause VDIs")
2380 def unpauseVDIs(self, vdiList):
2381 failed = False
2382 for vdi in vdiList:
2383 try:
2384 vdi.unpause()
2385 except:
2386 Util.log("ERROR: Failed to unpause VDI %s" % vdi)
2387 failed = True
2388 if failed:
2389 raise util.SMException("Failed to unpause VDIs")
2391 def getFreeSpace(self) -> int:
2392 return 0
2394 def cleanup(self):
2395 Util.log("In cleanup")
2396 return
2398 @override
2399 def __str__(self) -> str:
2400 if self.name:
2401 ret = "%s ('%s')" % (self.uuid[0:4], self.name)
2402 else:
2403 ret = "%s" % self.uuid
2404 return ret
2406 def lock(self):
2407 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort
2408 signal to avoid deadlocking (trying to acquire the SR lock while the
2409 lock is held by a process that is trying to abort us)"""
2410 if not self._srLock:
2411 return
2413 if self._locked == 0:
2414 abortFlag = IPCFlag(self.uuid)
2415 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK):
2416 if self._srLock.acquireNoblock():
2417 self._locked += 1
2418 return
2419 if abortFlag.test(FLAG_TYPE_ABORT):
2420 raise AbortException("Abort requested")
2421 time.sleep(SR.LOCK_RETRY_INTERVAL)
2422 raise util.SMException("Unable to acquire the SR lock")
2424 self._locked += 1
2426 def unlock(self):
2427 if not self._srLock: 2427 ↛ 2429line 2427 didn't jump to line 2429, because the condition on line 2427 was never false
2428 return
2429 assert(self._locked > 0)
2430 self._locked -= 1
2431 if self._locked == 0:
2432 self._srLock.release()
2434 def needUpdateBlockInfo(self) -> bool:
2435 for vdi in self.vdis.values():
2436 if vdi.scanError or len(vdi.children) == 0:
2437 continue
2438 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2439 return True
2440 return False
2442 def updateBlockInfo(self) -> None:
2443 for vdi in self.vdis.values():
2444 if vdi.scanError or len(vdi.children) == 0:
2445 continue
2446 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2447 vdi.updateBlockInfo()
2449 def cleanupCoalesceJournals(self):
2450 """Remove stale coalesce VDI indicators"""
2451 entries = self.journaler.getAll(VDI.JRN_COALESCE)
2452 for uuid, jval in entries.items():
2453 self.journaler.remove(VDI.JRN_COALESCE, uuid)
2455 def cleanupJournals(self, dryRun=False):
2456 """delete journal entries for non-existing VDIs"""
2457 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]:
2458 entries = self.journaler.getAll(t)
2459 for uuid, jval in entries.items():
2460 if self.getVDI(uuid):
2461 continue
2462 if t == SR.JRN_CLONE:
2463 baseUuid, clonUuid = jval.split("_")
2464 if self.getVDI(baseUuid):
2465 continue
2466 Util.log(" Deleting stale '%s' journal entry for %s "
2467 "(%s)" % (t, uuid, jval))
2468 if not dryRun:
2469 self.journaler.remove(t, uuid)
2471 def cleanupCache(self, maxAge=-1) -> int:
2472 return 0
2474 def _hasLeavesAttachedOn(self, vdi: VDI):
2475 leaves = vdi.getAllLeaves()
2476 leaves_vdi = [leaf.uuid for leaf in leaves]
2477 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi)
2479 def _gc_running_file(self, vdi: VDI):
2480 run_file = "gc_running_{}".format(vdi.uuid)
2481 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file)
2483 def _create_running_file(self, vdi: VDI):
2484 with open(self._gc_running_file(vdi), "w") as f:
2485 f.write("1")
2487 def _delete_running_file(self, vdi: VDI):
2488 os.unlink(self._gc_running_file(vdi))
2490 def _coalesce(self, vdi: VDI):
2491 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2491 ↛ 2494line 2491 didn't jump to line 2494, because the condition on line 2491 was never true
2492 # this means we had done the actual coalescing already and just
2493 # need to finish relinking and/or refreshing the children
2494 Util.log("==> Coalesce apparently already done: skipping")
2495 else:
2496 # JRN_COALESCE is used to check which VDI is being coalesced in
2497 # order to decide whether to abort the coalesce. We remove the
2498 # journal as soon as the COW coalesce step is done, because we
2499 # don't expect the rest of the process to take long
2501 if os.path.exists(self._gc_running_file(vdi)): 2501 ↛ 2502line 2501 didn't jump to line 2502, because the condition on line 2501 was never true
2502 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid))
2504 self._create_running_file(vdi)
2506 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1")
2507 host_refs = self._hasLeavesAttachedOn(vdi)
2508 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time
2509 if len(host_refs) > 1: 2509 ↛ 2510line 2509 didn't jump to line 2510, because the condition on line 2509 was never true
2510 util.SMlog("Not coalesceable, chain activated more than once")
2511 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error
2513 try:
2514 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2514 ↛ 2516line 2514 didn't jump to line 2516, because the condition on line 2514 was never true
2515 #Leaf opened on another host, we need to call online coalesce
2516 util.SMlog("Remote coalesce for {}".format(vdi.path))
2517 vdi._doCoalesceOnHost(list(host_refs)[0])
2518 else:
2519 util.SMlog("Offline coalesce for {}".format(vdi.path))
2520 vdi._doCoalesce()
2521 except Exception as e:
2522 util.SMlog("EXCEPTION while coalescing: {}".format(e))
2523 self._delete_running_file(vdi)
2524 raise
2526 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
2527 self._delete_running_file(vdi)
2529 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid)
2531 # we now need to relink the children: lock the SR to prevent ops
2532 # like SM.clone from manipulating the VDIs we'll be relinking and
2533 # rescan the SR first in case the children changed since the last
2534 # scan
2535 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
2537 self.lock()
2538 try:
2539 vdi.parent._tagChildrenForRelink()
2540 self.scan()
2541 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues
2542 finally:
2543 self.unlock()
2544 # Reload the children to leave things consistent
2545 vdi.parent._reloadChildren(vdi)
2546 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
2548 self.deleteVDI(vdi)
2550 class CoalesceTracker:
2551 GRACE_ITERATIONS = 2
2552 MAX_ITERATIONS_NO_PROGRESS = 3
2553 MAX_ITERATIONS = 10
2554 MAX_INCREASE_FROM_MINIMUM = 1.2
2555 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \
2556 " --> Final size {finSize}"
2558 def __init__(self, sr):
2559 self.itsNoProgress = 0
2560 self.its = 0
2561 self.minSize = float("inf")
2562 self.history = []
2563 self.reason = ""
2564 self.startSize = None
2565 self.finishSize = None
2566 self.sr = sr
2567 self.grace_remaining = self.GRACE_ITERATIONS
2569 def abortCoalesce(self, prevSize, curSize):
2570 self.its += 1
2571 self.history.append(self.HISTORY_STRING.format(its=self.its,
2572 initSize=prevSize,
2573 finSize=curSize))
2575 self.finishSize = curSize
2577 if self.startSize is None:
2578 self.startSize = prevSize
2580 if curSize < self.minSize:
2581 self.minSize = curSize
2583 if prevSize < self.minSize:
2584 self.minSize = prevSize
2586 if self.its == 1:
2587 # Skip evaluating conditions on first iteration
2588 return False
2590 if prevSize < curSize:
2591 self.itsNoProgress += 1
2592 Util.log("No progress, attempt:"
2593 " {attempt}".format(attempt=self.itsNoProgress))
2594 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid)
2595 else:
2596 # We made progress
2597 return False
2599 if self.its > self.MAX_ITERATIONS:
2600 max = self.MAX_ITERATIONS
2601 self.reason = \
2602 "Max iterations ({max}) exceeded".format(max=max)
2603 return True
2605 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS:
2606 max = self.MAX_ITERATIONS_NO_PROGRESS
2607 self.reason = \
2608 "No progress made for {max} iterations".format(max=max)
2609 return True
2611 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize
2612 if curSize > maxSizeFromMin:
2613 self.grace_remaining -= 1
2614 if self.grace_remaining == 0:
2615 self.reason = "Unexpected bump in size," \
2616 " compared to minimum achieved"
2618 return True
2620 return False
2622 def printSizes(self):
2623 Util.log("Starting size was {size}"
2624 .format(size=self.startSize))
2625 Util.log("Final size was {size}"
2626 .format(size=self.finishSize))
2627 Util.log("Minimum size achieved was {size}"
2628 .format(size=self.minSize))
2630 def printReasoning(self):
2631 Util.log("Aborted coalesce")
2632 for hist in self.history:
2633 Util.log(hist)
2634 Util.log(self.reason)
2635 self.printSizes()
2637 def printSummary(self):
2638 if self.its == 0:
2639 return
2641 if self.reason: 2641 ↛ 2642line 2641 didn't jump to line 2642, because the condition on line 2641 was never true
2642 Util.log("Aborted coalesce")
2643 Util.log(self.reason)
2644 else:
2645 Util.log("Coalesce summary")
2647 Util.log(f"Performed {self.its} iterations")
2648 self.printSizes()
2651 def _coalesceLeaf(self, vdi):
2652 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
2653 complete due to external changes, namely vdi_delete and vdi_snapshot
2654 that alter leaf-coalescibility of vdi"""
2655 tracker = self.CoalesceTracker(self)
2656 while not vdi.canLiveCoalesce(self.getStorageSpeed()):
2657 prevSizePhys = vdi.getSizePhys()
2658 if not self._snapshotCoalesce(vdi): 2658 ↛ 2659line 2658 didn't jump to line 2659, because the condition on line 2658 was never true
2659 return False
2660 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()):
2661 tracker.printReasoning()
2662 raise util.SMException("VDI {uuid} could not be coalesced"
2663 .format(uuid=vdi.uuid))
2664 tracker.printSummary()
2665 return self._liveLeafCoalesce(vdi)
2667 def calcStorageSpeed(self, startTime, endTime, coalescedSize):
2668 speed = None
2669 total_time = endTime - startTime
2670 if total_time > 0:
2671 speed = float(coalescedSize) / float(total_time)
2672 return speed
2674 def writeSpeedToFile(self, speed):
2675 content = []
2676 speedFile = None
2677 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2678 self.lock()
2679 try:
2680 Util.log("Writing to file: {myfile}".format(myfile=path))
2681 lines = ""
2682 if not os.path.isfile(path):
2683 lines = str(speed) + "\n"
2684 else:
2685 speedFile = open(path, "r+")
2686 content = speedFile.readlines()
2687 content.append(str(speed) + "\n")
2688 if len(content) > N_RUNNING_AVERAGE:
2689 del content[0]
2690 lines = "".join(content)
2692 util.atomicFileWrite(path, VAR_RUN, lines)
2693 finally:
2694 if speedFile is not None:
2695 speedFile.close()
2696 Util.log("Closing file: {myfile}".format(myfile=path))
2697 self.unlock()
2699 def recordStorageSpeed(self, startTime, endTime, coalescedSize):
2700 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize)
2701 if speed is None:
2702 return
2704 self.writeSpeedToFile(speed)
2706 def getStorageSpeed(self):
2707 speedFile = None
2708 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2709 self.lock()
2710 try:
2711 speed = None
2712 if os.path.isfile(path):
2713 speedFile = open(path)
2714 content = speedFile.readlines()
2715 try:
2716 content = [float(i) for i in content]
2717 except ValueError:
2718 Util.log("Something bad in the speed log:{log}".
2719 format(log=speedFile.readlines()))
2720 return speed
2722 if len(content):
2723 speed = sum(content) / float(len(content))
2724 if speed <= 0: 2724 ↛ 2726line 2724 didn't jump to line 2726, because the condition on line 2724 was never true
2725 # Defensive, should be impossible.
2726 Util.log("Bad speed: {speed} calculated for SR: {uuid}".
2727 format(speed=speed, uuid=self.uuid))
2728 speed = None
2729 else:
2730 Util.log("Speed file empty for SR: {uuid}".
2731 format(uuid=self.uuid))
2732 else:
2733 Util.log("Speed log missing for SR: {uuid}".
2734 format(uuid=self.uuid))
2735 return speed
2736 finally:
2737 if not (speedFile is None):
2738 speedFile.close()
2739 self.unlock()
2741 def _snapshotCoalesce(self, vdi):
2742 # Note that because we are not holding any locks here, concurrent SM
2743 # operations may change this tree under our feet. In particular, vdi
2744 # can be deleted, or it can be snapshotted.
2745 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED)
2746 Util.log("Single-snapshotting %s" % vdi)
2747 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid)
2748 try:
2749 ret = self.xapi.singleSnapshotVDI(vdi)
2750 Util.log("Single-snapshot returned: %s" % ret)
2751 except XenAPI.Failure as e:
2752 if util.isInvalidVDI(e):
2753 Util.log("The VDI appears to have been concurrently deleted")
2754 return False
2755 raise
2756 self.scanLocked()
2757 tempSnap = vdi.parent
2758 if not tempSnap.isCoalesceable():
2759 Util.log("The VDI appears to have been concurrently snapshotted")
2760 return False
2761 Util.log("Coalescing parent %s" % tempSnap)
2762 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
2763 sizePhys = vdi.getSizePhys()
2764 self._coalesce(tempSnap)
2765 if not vdi.isLeafCoalesceable():
2766 Util.log("The VDI tree appears to have been altered since")
2767 return False
2768 return True
2770 def _liveLeafCoalesce(self, vdi: VDI) -> bool:
2771 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
2772 self.lock()
2773 try:
2774 self.scan()
2775 if not self.getVDI(vdi.uuid):
2776 Util.log("The VDI appears to have been deleted meanwhile")
2777 return False
2778 if not vdi.isLeafCoalesceable():
2779 Util.log("The VDI is no longer leaf-coalesceable")
2780 return False
2782 uuid = vdi.uuid
2783 vdi.pause(failfast=True)
2784 try:
2785 try:
2786 # "vdi" object will no longer be valid after this call
2787 self._create_running_file(vdi)
2788 self._doCoalesceLeaf(vdi)
2789 except:
2790 Util.logException("_doCoalesceLeaf")
2791 self._handleInterruptedCoalesceLeaf()
2792 raise
2793 finally:
2794 vdi = self.getVDI(uuid)
2795 if vdi:
2796 vdi.ensureUnpaused()
2797 self._delete_running_file(vdi)
2798 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid)
2799 if vdiOld:
2800 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2801 self.deleteVDI(vdiOld)
2802 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2803 finally:
2804 self.cleanup()
2805 self.unlock()
2806 self.logFilter.logState()
2807 return True
2809 def _doCoalesceLeaf(self, vdi: VDI):
2810 """Actual coalescing of a leaf VDI onto parent. Must be called in an
2811 offline/atomic context"""
2812 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid)
2813 self._prepareCoalesceLeaf(vdi)
2814 vdi.parent._setHidden(False)
2815 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False)
2816 vdi.validate(True)
2817 vdi.parent.validate(True)
2818 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid)
2819 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT
2820 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2820 ↛ 2821line 2820 didn't jump to line 2821, because the condition on line 2820 was never true
2821 Util.log("Leaf-coalesce forced, will not use timeout")
2822 timeout = 0
2823 vdi._coalesceCowImage(timeout)
2824 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid)
2825 vdi.parent.validate(True)
2826 #vdi._verifyContents(timeout / 2)
2828 # rename
2829 vdiUuid = vdi.uuid
2830 oldName = vdi.fileName
2831 origParentUuid = vdi.parent.uuid
2832 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid)
2833 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid)
2834 vdi.parent.rename(vdiUuid)
2835 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid)
2836 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
2838 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is
2839 # garbage
2841 # update the VDI record
2842 if vdi.parent.vdi_type == VdiType.RAW: 2842 ↛ 2843line 2842 didn't jump to line 2843, because the condition on line 2842 was never true
2843 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW)
2844 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS)
2845 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
2847 self._updateNode(vdi)
2849 # delete the obsolete leaf & inflate the parent (in that order, to
2850 # minimize free space requirements)
2851 parent = vdi.parent
2852 vdi._setHidden(True)
2853 vdi.parent.children = []
2854 vdi.parent = None
2856 if parent.parent is None:
2857 parent.delConfig(VDI.DB_VDI_PARENT)
2859 extraSpace = self._calcExtraSpaceNeeded(vdi, parent)
2860 freeSpace = self.getFreeSpace()
2861 if freeSpace < extraSpace: 2861 ↛ 2864line 2861 didn't jump to line 2864, because the condition on line 2861 was never true
2862 # don't delete unless we need the space: deletion is time-consuming
2863 # because it requires contacting the slaves, and we're paused here
2864 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2865 self.deleteVDI(vdi)
2866 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2868 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid)
2869 self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
2871 self.forgetVDI(origParentUuid)
2872 self._finishCoalesceLeaf(parent)
2873 self._updateSlavesOnResize(parent)
2875 def _calcExtraSpaceNeeded(self, child, parent) -> int:
2876 assert(VdiType.isCowImage(parent.vdi_type))
2877 extra = child.getSizePhys() - parent.getSizePhys()
2878 if extra < 0: 2878 ↛ 2879line 2878 didn't jump to line 2879, because the condition on line 2878 was never true
2879 extra = 0
2880 return extra
2882 def _prepareCoalesceLeaf(self, vdi) -> None:
2883 pass
2885 def _updateNode(self, vdi) -> None:
2886 pass
2888 def _finishCoalesceLeaf(self, parent) -> None:
2889 pass
2891 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
2892 pass
2894 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None:
2895 pass
2897 def _updateSlavesOnResize(self, vdi) -> None:
2898 pass
2900 def _removeStaleVDIs(self, uuidsPresent) -> None:
2901 for uuid in list(self.vdis.keys()):
2902 if not uuid in uuidsPresent:
2903 Util.log("VDI %s disappeared since last scan" % \
2904 self.vdis[uuid])
2905 del self.vdis[uuid]
2907 def _handleInterruptedCoalesceLeaf(self) -> None:
2908 """An interrupted leaf-coalesce operation may leave the COW tree in an
2909 inconsistent state. If the old-leaf VDI is still present, we revert the
2910 operation (in case the original error is persistent); otherwise we must
2911 finish the operation"""
2912 pass
2914 def _buildTree(self, force):
2915 self.vdiTrees = []
2916 for vdi in self.vdis.values():
2917 if vdi.parentUuid:
2918 parent = self.getVDI(vdi.parentUuid)
2919 if not parent:
2920 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX):
2921 self.vdiTrees.append(vdi)
2922 continue
2923 if force:
2924 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \
2925 (vdi.parentUuid, vdi.uuid))
2926 self.vdiTrees.append(vdi)
2927 continue
2928 else:
2929 raise util.SMException("Parent VDI %s of %s not " \
2930 "found" % (vdi.parentUuid, vdi.uuid))
2931 vdi.parent = parent
2932 parent.children.append(vdi)
2933 else:
2934 self.vdiTrees.append(vdi)
2937class FileSR(SR):
2938 TYPE = SR.TYPE_FILE
2939 CACHE_FILE_EXT = ".vhdcache"
2940 # cache cleanup actions
2941 CACHE_ACTION_KEEP = 0
2942 CACHE_ACTION_REMOVE = 1
2943 CACHE_ACTION_REMOVE_IF_INACTIVE = 2
2945 def __init__(self, uuid, xapi, createLock, force):
2946 SR.__init__(self, uuid, xapi, createLock, force)
2947 self.path = "/var/run/sr-mount/%s" % self.uuid
2948 self.journaler = fjournaler.Journaler(self.path)
2950 @override
2951 def scan(self, force=False) -> None:
2952 if not util.pathexists(self.path):
2953 raise util.SMException("directory %s not found!" % self.uuid)
2955 uuidsPresent: List[str] = []
2957 for vdi_type in VDI_COW_TYPES:
2958 scan_result = self._scan(vdi_type, force)
2959 for uuid, image_info in scan_result.items():
2960 vdi = self.getVDI(uuid)
2961 if not vdi:
2962 self.logFilter.logNewVDI(uuid)
2963 vdi = FileVDI(self, uuid, vdi_type)
2964 self.vdis[uuid] = vdi
2965 vdi.load(image_info)
2966 uuidsPresent.extend(scan_result.keys())
2968 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)]
2969 for rawName in rawList:
2970 uuid = FileVDI.extractUuid(rawName)
2971 uuidsPresent.append(uuid)
2972 vdi = self.getVDI(uuid)
2973 if not vdi:
2974 self.logFilter.logNewVDI(uuid)
2975 vdi = FileVDI(self, uuid, VdiType.RAW)
2976 self.vdis[uuid] = vdi
2977 self._removeStaleVDIs(uuidsPresent)
2978 self._buildTree(force)
2979 self.logFilter.logState()
2980 self._handleInterruptedCoalesceLeaf()
2982 @override
2983 def getFreeSpace(self) -> int:
2984 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
2986 @override
2987 def deleteVDIs(self, vdiList) -> None:
2988 rootDeleted = False
2989 for vdi in vdiList:
2990 if not vdi.parent:
2991 rootDeleted = True
2992 break
2993 SR.deleteVDIs(self, vdiList)
2994 if self.xapi.srRecord["type"] == "nfs" and rootDeleted:
2995 self.xapi.markCacheSRsDirty()
2997 @override
2998 def cleanupCache(self, maxAge=-1) -> int:
2999 """Clean up IntelliCache cache files. Caches for leaf nodes are
3000 removed when the leaf node no longer exists or its allow-caching
3001 attribute is not set. Caches for parent nodes are removed when the
3002 parent node no longer exists or it hasn't been used in more than
3003 <maxAge> hours.
3004 Return number of caches removed.
3005 """
3006 numRemoved = 0
3007 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)]
3008 Util.log("Found %d cache files" % len(cacheFiles))
3009 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge)
3010 for cacheFile in cacheFiles:
3011 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)]
3012 action = self.CACHE_ACTION_KEEP
3013 rec = self.xapi.getRecordVDI(uuid)
3014 if not rec:
3015 Util.log("Cache %s: VDI doesn't exist" % uuid)
3016 action = self.CACHE_ACTION_REMOVE
3017 elif rec["managed"] and not rec["allow_caching"]:
3018 Util.log("Cache %s: caching disabled" % uuid)
3019 action = self.CACHE_ACTION_REMOVE
3020 elif not rec["managed"] and maxAge >= 0:
3021 lastAccess = datetime.datetime.fromtimestamp( \
3022 os.path.getatime(os.path.join(self.path, cacheFile)))
3023 if lastAccess < cutoff:
3024 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge))
3025 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
3027 if action == self.CACHE_ACTION_KEEP:
3028 Util.log("Keeping cache %s" % uuid)
3029 continue
3031 lockId = uuid
3032 parentUuid = None
3033 if rec and rec["managed"]:
3034 parentUuid = rec["sm_config"].get("vhd-parent")
3035 if parentUuid:
3036 lockId = parentUuid
3038 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId)
3039 cacheLock.acquire()
3040 try:
3041 if self._cleanupCache(uuid, action):
3042 numRemoved += 1
3043 finally:
3044 cacheLock.release()
3045 return numRemoved
3047 def _cleanupCache(self, uuid, action):
3048 assert(action != self.CACHE_ACTION_KEEP)
3049 rec = self.xapi.getRecordVDI(uuid)
3050 if rec and rec["allow_caching"]:
3051 Util.log("Cache %s appears to have become valid" % uuid)
3052 return False
3054 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT)
3055 tapdisk = blktap2.Tapdisk.find_by_path(fullPath)
3056 if tapdisk:
3057 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE:
3058 Util.log("Cache %s still in use" % uuid)
3059 return False
3060 Util.log("Shutting down tapdisk for %s" % fullPath)
3061 tapdisk.shutdown()
3063 Util.log("Deleting file %s" % fullPath)
3064 os.unlink(fullPath)
3065 return True
3067 def _isCacheFileName(self, name):
3068 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \
3069 name.endswith(self.CACHE_FILE_EXT)
3071 def _scan(self, vdi_type, force):
3072 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3073 error = False
3074 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type])
3075 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid)
3076 for uuid, vdiInfo in scan_result.items():
3077 if vdiInfo.error:
3078 error = True
3079 break
3080 if not error:
3081 return scan_result
3082 Util.log("Scan error on attempt %d" % i)
3083 if force:
3084 return scan_result
3085 raise util.SMException("Scan error")
3087 @override
3088 def deleteVDI(self, vdi) -> None:
3089 self._checkSlaves(vdi)
3090 SR.deleteVDI(self, vdi)
3092 def _checkSlaves(self, vdi):
3093 onlineHosts = self.xapi.getOnlineHosts()
3094 abortFlag = IPCFlag(self.uuid)
3095 for pbdRecord in self.xapi.getAttachedPBDs():
3096 hostRef = pbdRecord["host"]
3097 if hostRef == self.xapi._hostRef:
3098 continue
3099 if abortFlag.test(FLAG_TYPE_ABORT):
3100 raise AbortException("Aborting due to signal")
3101 try:
3102 self._checkSlave(hostRef, vdi)
3103 except util.CommandException:
3104 if hostRef in onlineHosts:
3105 raise
3107 def _checkSlave(self, hostRef, vdi):
3108 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path})
3109 Util.log("Checking with slave: %s" % repr(call))
3110 _host = self.xapi.session.xenapi.host
3111 text = _host.call_plugin( * call)
3113 @override
3114 def _handleInterruptedCoalesceLeaf(self) -> None:
3115 entries = self.journaler.getAll(VDI.JRN_LEAF)
3116 for uuid, parentUuid in entries.items():
3117 fileList = os.listdir(self.path)
3118 childName = uuid + VdiTypeExtension.VHD
3119 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD
3120 parentName1 = parentUuid + VdiTypeExtension.VHD
3121 parentName2 = parentUuid + VdiTypeExtension.RAW
3122 parentPresent = (parentName1 in fileList or parentName2 in fileList)
3123 if parentPresent or tmpChildName in fileList:
3124 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3125 else:
3126 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3127 self.journaler.remove(VDI.JRN_LEAF, uuid)
3128 vdi = self.getVDI(uuid)
3129 if vdi:
3130 vdi.ensureUnpaused()
3132 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3133 Util.log("*** UNDO LEAF-COALESCE")
3134 parent = self.getVDI(parentUuid)
3135 if not parent:
3136 parent = self.getVDI(childUuid)
3137 if not parent:
3138 raise util.SMException("Neither %s nor %s found" % \
3139 (parentUuid, childUuid))
3140 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3141 parent.rename(parentUuid)
3142 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3144 child = self.getVDI(childUuid)
3145 if not child:
3146 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3147 if not child:
3148 raise util.SMException("Neither %s nor %s found" % \
3149 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3150 Util.log("Renaming child back to %s" % childUuid)
3151 child.rename(childUuid)
3152 Util.log("Updating the VDI record")
3153 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3154 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3155 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3157 if child.isHidden():
3158 child._setHidden(False)
3159 if not parent.isHidden():
3160 parent._setHidden(True)
3161 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3162 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3163 Util.log("*** leaf-coalesce undo successful")
3164 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3165 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3167 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3168 Util.log("*** FINISH LEAF-COALESCE")
3169 vdi = self.getVDI(childUuid)
3170 if not vdi:
3171 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting")
3172 raise util.SMException("VDI %s not found" % childUuid)
3173 try:
3174 self.forgetVDI(parentUuid)
3175 except XenAPI.Failure:
3176 Util.logException('_finishInterruptedCoalesceLeaf')
3177 pass
3178 self._updateSlavesOnResize(vdi)
3179 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3180 Util.log("*** finished leaf-coalesce successfully")
3183class LVMSR(SR):
3184 TYPE = SR.TYPE_LVHD
3185 SUBTYPES = ["lvhdoiscsi", "lvhdohba"]
3187 def __init__(self, uuid, xapi, createLock, force):
3188 SR.__init__(self, uuid, xapi, createLock, force)
3189 self.vgName = "%s%s" % (VG_PREFIX, self.uuid)
3190 self.path = os.path.join(VG_LOCATION, self.vgName)
3192 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid)
3193 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref)
3194 lvm_conf = other_conf.get('lvm-conf') if other_conf else None
3195 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf)
3197 self.lvActivator = LVActivator(self.uuid, self.lvmCache)
3198 self.journaler = journaler.Journaler(self.lvmCache)
3200 @override
3201 def deleteVDI(self, vdi) -> None:
3202 if self.lvActivator.get(vdi.uuid, False):
3203 self.lvActivator.deactivate(vdi.uuid, False)
3204 self._checkSlaves(vdi)
3205 SR.deleteVDI(self, vdi)
3207 @override
3208 def forgetVDI(self, vdiUuid) -> None:
3209 SR.forgetVDI(self, vdiUuid)
3210 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME)
3211 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
3213 @override
3214 def getFreeSpace(self) -> int:
3215 stats = lvutil._getVGstats(self.vgName)
3216 return stats['physical_size'] - stats['physical_utilisation']
3218 @override
3219 def cleanup(self):
3220 if not self.lvActivator.deactivateAll():
3221 Util.log("ERROR deactivating LVs while cleaning up")
3223 @override
3224 def needUpdateBlockInfo(self) -> bool:
3225 for vdi in self.vdis.values():
3226 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3227 continue
3228 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3229 return True
3230 return False
3232 @override
3233 def updateBlockInfo(self) -> None:
3234 numUpdated = 0
3235 for vdi in self.vdis.values():
3236 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3237 continue
3238 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3239 vdi.updateBlockInfo()
3240 numUpdated += 1
3241 if numUpdated:
3242 # deactivate the LVs back sooner rather than later. If we don't
3243 # now, by the time this thread gets to deactivations, another one
3244 # might have leaf-coalesced a node and deleted it, making the child
3245 # inherit the refcount value and preventing the correct decrement
3246 self.cleanup()
3248 @override
3249 def scan(self, force=False) -> None:
3250 vdis = self._scan(force)
3251 for uuid, vdiInfo in vdis.items():
3252 vdi = self.getVDI(uuid)
3253 if not vdi:
3254 self.logFilter.logNewVDI(uuid)
3255 vdi = LVMVDI(self, uuid, vdiInfo.vdiType)
3256 self.vdis[uuid] = vdi
3257 vdi.load(vdiInfo)
3258 self._removeStaleVDIs(vdis.keys())
3259 self._buildTree(force)
3260 self.logFilter.logState()
3261 self._handleInterruptedCoalesceLeaf()
3263 def _scan(self, force):
3264 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3265 error = False
3266 self.lvmCache.refresh()
3267 vdis = LvmCowUtil.getVDIInfo(self.lvmCache)
3268 for uuid, vdiInfo in vdis.items():
3269 if vdiInfo.scanError:
3270 error = True
3271 break
3272 if not error:
3273 return vdis
3274 Util.log("Scan error, retrying (%d)" % i)
3275 if force:
3276 return vdis
3277 raise util.SMException("Scan error")
3279 @override
3280 def _removeStaleVDIs(self, uuidsPresent) -> None:
3281 for uuid in list(self.vdis.keys()):
3282 if not uuid in uuidsPresent:
3283 Util.log("VDI %s disappeared since last scan" % \
3284 self.vdis[uuid])
3285 del self.vdis[uuid]
3286 if self.lvActivator.get(uuid, False):
3287 self.lvActivator.remove(uuid, False)
3289 @override
3290 def _liveLeafCoalesce(self, vdi) -> bool:
3291 """If the parent is raw and the child was resized (virt. size), then
3292 we'll need to resize the parent, which can take a while due to zeroing
3293 out of the extended portion of the LV. Do it before pausing the child
3294 to avoid a protracted downtime"""
3295 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt:
3296 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3297 vdi.parent._increaseSizeVirt(vdi.sizeVirt)
3299 return SR._liveLeafCoalesce(self, vdi)
3301 @override
3302 def _prepareCoalesceLeaf(self, vdi) -> None:
3303 vdi._activateChain()
3304 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3305 vdi.deflate()
3306 vdi.inflateParentForCoalesce()
3308 @override
3309 def _updateNode(self, vdi) -> None:
3310 # fix the refcounts: the remaining node should inherit the binary
3311 # refcount from the leaf (because if it was online, it should remain
3312 # refcounted as such), but the normal refcount from the parent (because
3313 # this node is really the parent node) - minus 1 if it is online (since
3314 # non-leaf nodes increment their normal counts when they are online and
3315 # we are now a leaf, storing that 1 in the binary refcount).
3316 ns = NS_PREFIX_LVM + self.uuid
3317 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns)
3318 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns)
3319 pCnt = pCnt - cBcnt
3320 assert(pCnt >= 0)
3321 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
3323 @override
3324 def _finishCoalesceLeaf(self, parent) -> None:
3325 if not parent.isSnapshot() or parent.isAttachedRW():
3326 parent.inflateFully()
3327 else:
3328 parent.deflate()
3330 @override
3331 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3332 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV
3334 @override
3335 def _handleInterruptedCoalesceLeaf(self) -> None:
3336 entries = self.journaler.getAll(VDI.JRN_LEAF)
3337 for uuid, parentUuid in entries.items():
3338 undo = False
3339 for prefix in LV_PREFIX.values():
3340 parentLV = prefix + parentUuid
3341 undo = self.lvmCache.checkLV(parentLV)
3342 if undo:
3343 break
3345 if not undo:
3346 for prefix in LV_PREFIX.values():
3347 tmpChildLV = prefix + uuid
3348 undo = self.lvmCache.checkLV(tmpChildLV)
3349 if undo:
3350 break
3352 if undo:
3353 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3354 else:
3355 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3356 self.journaler.remove(VDI.JRN_LEAF, uuid)
3357 vdi = self.getVDI(uuid)
3358 if vdi:
3359 vdi.ensureUnpaused()
3361 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3362 Util.log("*** UNDO LEAF-COALESCE")
3363 parent = self.getVDI(parentUuid)
3364 if not parent:
3365 parent = self.getVDI(childUuid)
3366 if not parent:
3367 raise util.SMException("Neither %s nor %s found" % \
3368 (parentUuid, childUuid))
3369 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3370 parent.rename(parentUuid)
3371 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3373 child = self.getVDI(childUuid)
3374 if not child:
3375 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3376 if not child:
3377 raise util.SMException("Neither %s nor %s found" % \
3378 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3379 Util.log("Renaming child back to %s" % childUuid)
3380 child.rename(childUuid)
3381 Util.log("Updating the VDI record")
3382 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3383 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3384 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3386 # refcount (best effort - assume that it had succeeded if the
3387 # second rename succeeded; if not, this adjustment will be wrong,
3388 # leading to a non-deactivation of the LV)
3389 ns = NS_PREFIX_LVM + self.uuid
3390 cCnt, cBcnt = RefCounter.check(child.uuid, ns)
3391 pCnt, pBcnt = RefCounter.check(parent.uuid, ns)
3392 pCnt = pCnt + cBcnt
3393 RefCounter.set(parent.uuid, pCnt, 0, ns)
3394 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
3396 parent.deflate()
3397 child.inflateFully()
3398 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid)
3399 if child.isHidden():
3400 child._setHidden(False)
3401 if not parent.isHidden():
3402 parent._setHidden(True)
3403 if not parent.lvReadonly:
3404 self.lvmCache.setReadonly(parent.fileName, True)
3405 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3406 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3407 Util.log("*** leaf-coalesce undo successful")
3408 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3409 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3411 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3412 Util.log("*** FINISH LEAF-COALESCE")
3413 vdi = self.getVDI(childUuid)
3414 if not vdi:
3415 raise util.SMException("VDI %s not found" % childUuid)
3416 vdi.inflateFully()
3417 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid)
3418 try:
3419 self.forgetVDI(parentUuid)
3420 except XenAPI.Failure:
3421 pass
3422 self._updateSlavesOnResize(vdi)
3423 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3424 Util.log("*** finished leaf-coalesce successfully")
3426 def _checkSlaves(self, vdi):
3427 """Confirm with all slaves in the pool that 'vdi' is not in use. We
3428 try to check all slaves, including those that the Agent believes are
3429 offline, but ignore failures for offline hosts. This is to avoid cases
3430 where the Agent thinks a host is offline but the host is up."""
3431 args = {"vgName": self.vgName,
3432 "action1": "deactivateNoRefcount",
3433 "lvName1": vdi.fileName,
3434 "action2": "cleanupLockAndRefcount",
3435 "uuid2": vdi.uuid,
3436 "ns2": NS_PREFIX_LVM + self.uuid}
3437 onlineHosts = self.xapi.getOnlineHosts()
3438 abortFlag = IPCFlag(self.uuid)
3439 for pbdRecord in self.xapi.getAttachedPBDs():
3440 hostRef = pbdRecord["host"]
3441 if hostRef == self.xapi._hostRef:
3442 continue
3443 if abortFlag.test(FLAG_TYPE_ABORT):
3444 raise AbortException("Aborting due to signal")
3445 Util.log("Checking with slave %s (path %s)" % (
3446 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path))
3447 try:
3448 self.xapi.ensureInactive(hostRef, args)
3449 except XenAPI.Failure:
3450 if hostRef in onlineHosts:
3451 raise
3453 @override
3454 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
3455 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid])
3456 if not slaves:
3457 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \
3458 child)
3459 return
3461 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid
3462 args = {"vgName": self.vgName,
3463 "action1": "deactivateNoRefcount",
3464 "lvName1": tmpName,
3465 "action2": "deactivateNoRefcount",
3466 "lvName2": child.fileName,
3467 "action3": "refresh",
3468 "lvName3": child.fileName,
3469 "action4": "refresh",
3470 "lvName4": parent.fileName}
3471 for slave in slaves:
3472 Util.log("Updating %s, %s, %s on slave %s" % \
3473 (tmpName, child.fileName, parent.fileName,
3474 self.xapi.getRecordHost(slave)['hostname']))
3475 text = self.xapi.session.xenapi.host.call_plugin( \
3476 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3477 Util.log("call-plugin returned: '%s'" % text)
3479 @override
3480 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None:
3481 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid])
3482 if not slaves:
3483 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi)
3484 return
3486 args = {"vgName": self.vgName,
3487 "action1": "deactivateNoRefcount",
3488 "lvName1": oldNameLV,
3489 "action2": "refresh",
3490 "lvName2": vdi.fileName,
3491 "action3": "cleanupLockAndRefcount",
3492 "uuid3": origParentUuid,
3493 "ns3": NS_PREFIX_LVM + self.uuid}
3494 for slave in slaves:
3495 Util.log("Updating %s to %s on slave %s" % \
3496 (oldNameLV, vdi.fileName,
3497 self.xapi.getRecordHost(slave)['hostname']))
3498 text = self.xapi.session.xenapi.host.call_plugin( \
3499 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3500 Util.log("call-plugin returned: '%s'" % text)
3502 @override
3503 def _updateSlavesOnResize(self, vdi) -> None:
3504 uuids = [x.uuid for x in vdi.getAllLeaves()]
3505 slaves = util.get_slaves_attached_on(self.xapi.session, uuids)
3506 if not slaves:
3507 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi)
3508 return
3509 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName,
3510 vdi.fileName, vdi.uuid, slaves)
3513class LinstorSR(SR):
3514 TYPE = SR.TYPE_LINSTOR
3516 def __init__(self, uuid, xapi, createLock, force):
3517 if not LINSTOR_AVAILABLE:
3518 raise util.SMException(
3519 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing'
3520 )
3522 SR.__init__(self, uuid, xapi, createLock, force)
3523 self.path = LinstorVolumeManager.DEV_ROOT_PATH
3525 class LinstorProxy:
3526 def __init__(self, sr: LinstorSR) -> None:
3527 self.sr = sr
3529 def __getattr__(self, attr: str) -> Any:
3530 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance"
3531 return getattr(self.sr._linstor, attr)
3533 self._linstor_proxy = LinstorProxy(self)
3534 self._reloadLinstor(journaler_only=True)
3536 @override
3537 def deleteVDI(self, vdi) -> None:
3538 self._checkSlaves(vdi)
3539 SR.deleteVDI(self, vdi)
3541 @override
3542 def getFreeSpace(self) -> int:
3543 return self._linstor.max_volume_size_allowed
3545 @override
3546 def scan(self, force=False) -> None:
3547 all_vdi_info = self._scan(force)
3548 for uuid, vdiInfo in all_vdi_info.items():
3549 # When vdiInfo is None, the VDI is RAW.
3550 vdi = self.getVDI(uuid)
3551 if not vdi:
3552 self.logFilter.logNewVDI(uuid)
3553 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW)
3554 self.vdis[uuid] = vdi
3555 if vdiInfo:
3556 vdi.load(vdiInfo)
3557 self._removeStaleVDIs(all_vdi_info.keys())
3558 self._buildTree(force)
3559 self.logFilter.logState()
3560 self._handleInterruptedCoalesceLeaf()
3562 @override
3563 def pauseVDIs(self, vdiList) -> None:
3564 self._linstor.ensure_volume_list_is_not_locked(
3565 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3566 )
3567 return super(LinstorSR, self).pauseVDIs(vdiList)
3569 def _reloadLinstor(self, journaler_only=False):
3570 session = self.xapi.session
3571 host_ref = util.get_this_host_ref(session)
3572 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
3574 pbd = util.find_my_pbd(session, host_ref, sr_ref)
3575 if pbd is None:
3576 raise util.SMException('Failed to find PBD')
3578 dconf = session.xenapi.PBD.get_device_config(pbd)
3579 group_name = dconf['group-name']
3581 controller_uri = get_controller_uri()
3582 self.journaler = LinstorJournaler(
3583 controller_uri, group_name, logger=util.SMlog
3584 )
3586 if journaler_only:
3587 return
3589 self._linstor = LinstorVolumeManager(
3590 controller_uri,
3591 group_name,
3592 repair=True,
3593 logger=util.SMlog
3594 )
3596 def _scan(self, force):
3597 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3598 self._reloadLinstor()
3599 error = False
3600 try:
3601 all_vdi_info = self._load_vdi_info()
3602 for uuid, vdiInfo in all_vdi_info.items():
3603 if vdiInfo and vdiInfo.error:
3604 error = True
3605 break
3606 if not error:
3607 return all_vdi_info
3608 Util.log('Scan error, retrying ({})'.format(i))
3609 except Exception as e:
3610 Util.log('Scan exception, retrying ({}): {}'.format(i, e))
3611 Util.log(traceback.format_exc())
3613 if force:
3614 return all_vdi_info
3615 raise util.SMException('Scan error')
3617 def _load_vdi_info(self):
3618 all_vdi_info = {}
3620 # TODO: Ensure metadata contains the right info.
3622 all_volume_info = self._linstor.get_volumes_with_info()
3623 volumes_metadata = self._linstor.get_volumes_with_metadata()
3624 for vdi_uuid, volume_info in all_volume_info.items():
3625 vdi_type = VdiType.RAW
3626 try:
3627 volume_metadata = volumes_metadata[vdi_uuid]
3628 if not volume_info.name and not list(volume_metadata.items()):
3629 continue # Ignore it, probably deleted.
3631 if vdi_uuid.startswith('DELETED_'):
3632 # Assume it's really a RAW volume of a failed snap without COW header/footer.
3633 # We must remove this VDI now without adding it in the VDI list.
3634 # Otherwise `Relinking` calls and other actions can be launched on it.
3635 # We don't want that...
3636 Util.log('Deleting bad VDI {}'.format(vdi_uuid))
3638 self.lock()
3639 try:
3640 self._linstor.destroy_volume(vdi_uuid)
3641 try:
3642 self.forgetVDI(vdi_uuid)
3643 except:
3644 pass
3645 except Exception as e:
3646 Util.log('Cannot delete bad VDI: {}'.format(e))
3647 finally:
3648 self.unlock()
3649 continue
3651 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
3652 volume_name = self._linstor.get_volume_name(vdi_uuid)
3653 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX):
3654 # Always RAW!
3655 info = None
3656 elif VdiType.isCowImage(vdi_type):
3657 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid)
3658 else:
3659 # Ensure it's not a COW image...
3660 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type)
3661 try:
3662 info = linstorcowutil.get_info(vdi_uuid)
3663 except:
3664 try:
3665 linstorcowutil.force_repair(
3666 self._linstor.get_device_path(vdi_uuid)
3667 )
3668 info = linstorcowutil.get_info(vdi_uuid)
3669 except:
3670 info = None
3672 except Exception as e:
3673 Util.log(
3674 ' [VDI {}: failed to load VDI info]: {}'
3675 .format(vdi_uuid, e)
3676 )
3677 info = CowImageInfo(vdi_uuid)
3678 info.error = 1
3680 if info:
3681 info.vdiType = vdi_type
3683 all_vdi_info[vdi_uuid] = info
3685 return all_vdi_info
3687 @override
3688 def _prepareCoalesceLeaf(self, vdi) -> None:
3689 vdi._activateChain()
3690 vdi.deflate()
3691 vdi._inflateParentForCoalesce()
3693 @override
3694 def _finishCoalesceLeaf(self, parent) -> None:
3695 if not parent.isSnapshot() or parent.isAttachedRW():
3696 parent.inflateFully()
3697 else:
3698 parent.deflate()
3700 @override
3701 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3702 return LinstorCowUtil(
3703 self.xapi.session, self._linstor, parent.vdi_type
3704 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize()
3706 def _hasValidDevicePath(self, uuid):
3707 try:
3708 self._linstor.get_device_path(uuid)
3709 except Exception:
3710 # TODO: Maybe log exception.
3711 return False
3712 return True
3714 @override
3715 def _liveLeafCoalesce(self, vdi) -> bool:
3716 self.lock()
3717 try:
3718 self._linstor.ensure_volume_is_not_locked(
3719 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3720 )
3721 return super(LinstorSR, self)._liveLeafCoalesce(vdi)
3722 finally:
3723 self.unlock()
3725 @override
3726 def _handleInterruptedCoalesceLeaf(self) -> None:
3727 entries = self.journaler.get_all(VDI.JRN_LEAF)
3728 for uuid, parentUuid in entries.items():
3729 if self._hasValidDevicePath(parentUuid) or \
3730 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid):
3731 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3732 else:
3733 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3734 self.journaler.remove(VDI.JRN_LEAF, uuid)
3735 vdi = self.getVDI(uuid)
3736 if vdi:
3737 vdi.ensureUnpaused()
3739 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3740 Util.log('*** UNDO LEAF-COALESCE')
3741 parent = self.getVDI(parentUuid)
3742 if not parent:
3743 parent = self.getVDI(childUuid)
3744 if not parent:
3745 raise util.SMException(
3746 'Neither {} nor {} found'.format(parentUuid, childUuid)
3747 )
3748 Util.log(
3749 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
3750 )
3751 parent.rename(parentUuid)
3753 child = self.getVDI(childUuid)
3754 if not child:
3755 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3756 if not child:
3757 raise util.SMException(
3758 'Neither {} nor {} found'.format(
3759 childUuid, self.TMP_RENAME_PREFIX + childUuid
3760 )
3761 )
3762 Util.log('Renaming child back to {}'.format(childUuid))
3763 child.rename(childUuid)
3764 Util.log('Updating the VDI record')
3765 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3766 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3768 # TODO: Maybe deflate here.
3770 if child.isHidden():
3771 child._setHidden(False)
3772 if not parent.isHidden():
3773 parent._setHidden(True)
3774 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3775 Util.log('*** leaf-coalesce undo successful')
3777 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3778 Util.log('*** FINISH LEAF-COALESCE')
3779 vdi = self.getVDI(childUuid)
3780 if not vdi:
3781 raise util.SMException('VDI {} not found'.format(childUuid))
3782 # TODO: Maybe inflate.
3783 try:
3784 self.forgetVDI(parentUuid)
3785 except XenAPI.Failure:
3786 pass
3787 self._updateSlavesOnResize(vdi)
3788 Util.log('*** finished leaf-coalesce successfully')
3790 def _checkSlaves(self, vdi):
3791 try:
3792 all_openers = self._linstor.get_volume_openers(vdi.uuid)
3793 for openers in all_openers.values():
3794 for opener in openers.values():
3795 if opener['process-name'] != 'tapdisk':
3796 raise util.SMException(
3797 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3798 )
3799 except LinstorVolumeManagerError as e:
3800 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
3801 raise
3804################################################################################
3805#
3806# Helpers
3807#
3808def daemonize():
3809 pid = os.fork()
3810 if pid:
3811 os.waitpid(pid, 0)
3812 Util.log("New PID [%d]" % pid)
3813 return False
3814 os.chdir("/")
3815 os.setsid()
3816 pid = os.fork()
3817 if pid:
3818 Util.log("Will finish as PID [%d]" % pid)
3819 os._exit(0)
3820 for fd in [0, 1, 2]:
3821 try:
3822 os.close(fd)
3823 except OSError:
3824 pass
3825 # we need to fill those special fd numbers or pread won't work
3826 sys.stdin = open("/dev/null", 'r')
3827 sys.stderr = open("/dev/null", 'w')
3828 sys.stdout = open("/dev/null", 'w')
3829 # As we're a new process we need to clear the lock objects
3830 lock.Lock.clearAll()
3831 return True
3834def normalizeType(type):
3835 if type in LVMSR.SUBTYPES:
3836 type = SR.TYPE_LVHD
3837 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]:
3838 # temporary while LVHD is symlinked as LVM
3839 type = SR.TYPE_LVHD
3840 if type in [
3841 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs",
3842 "moosefs", "xfs", "zfs", "largeblock"
3843 ]:
3844 type = SR.TYPE_FILE
3845 if type in ["linstor"]:
3846 type = SR.TYPE_LINSTOR
3847 if type not in SR.TYPES:
3848 raise util.SMException("Unsupported SR type: %s" % type)
3849 return type
3851GCPAUSE_DEFAULT_SLEEP = 5 * 60
3854def _gc_init_file(sr_uuid):
3855 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init')
3858def _create_init_file(sr_uuid):
3859 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid)))
3860 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f:
3861 f.write('1')
3864def _gcLoopPause(sr, dryRun=False, immediate=False):
3865 if immediate:
3866 return
3868 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist
3869 # point will just return. Otherwise, fall back on an abortable sleep.
3871 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT):
3873 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3873 ↛ exitline 3873 didn't jump to the function exit
3874 lambda *args: None)
3875 elif os.path.exists(_gc_init_file(sr.uuid)):
3876 def abortTest():
3877 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
3879 # If time.sleep hangs we are in deep trouble, however for
3880 # completeness we set the timeout of the abort thread to
3881 # 110% of GCPAUSE_DEFAULT_SLEEP.
3882 Util.log("GC active, about to go quiet")
3883 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3883 ↛ exitline 3883 didn't run the lambda on line 3883
3884 None, sr.uuid, abortTest, VDI.POLL_INTERVAL,
3885 GCPAUSE_DEFAULT_SLEEP * 1.1)
3886 Util.log("GC active, quiet period ended")
3889def _gcLoop(sr, dryRun=False, immediate=False):
3890 if not lockGCActive.acquireNoblock(): 3890 ↛ 3891line 3890 didn't jump to line 3891, because the condition on line 3890 was never true
3891 Util.log("Another GC instance already active, exiting")
3892 return
3894 # Check we're still attached after acquiring locks
3895 if not sr.xapi.isPluggedHere():
3896 Util.log("SR no longer attached, exiting")
3897 return
3899 # Clean up Intellicache files
3900 sr.cleanupCache()
3902 # Track how many we do
3903 coalesced = 0
3904 task_status = "success"
3905 try:
3906 # Check if any work needs to be done
3907 if not sr.xapi.isPluggedHere(): 3907 ↛ 3908line 3907 didn't jump to line 3908, because the condition on line 3907 was never true
3908 Util.log("SR no longer attached, exiting")
3909 return
3910 sr.scanLocked()
3911 if not sr.hasWork():
3912 Util.log("No work, exiting")
3913 return
3914 sr.xapi.create_task(
3915 "Garbage Collection",
3916 "Garbage collection for SR %s" % sr.uuid)
3917 _gcLoopPause(sr, dryRun, immediate=immediate)
3918 while True:
3919 if SIGTERM:
3920 Util.log("Term requested")
3921 return
3923 if not sr.xapi.isPluggedHere(): 3923 ↛ 3924line 3923 didn't jump to line 3924, because the condition on line 3923 was never true
3924 Util.log("SR no longer attached, exiting")
3925 break
3926 sr.scanLocked()
3927 if not sr.hasWork():
3928 Util.log("No work, exiting")
3929 break
3931 if not lockGCRunning.acquireNoblock(): 3931 ↛ 3932line 3931 didn't jump to line 3932, because the condition on line 3931 was never true
3932 Util.log("Unable to acquire GC running lock.")
3933 return
3934 try:
3935 if not sr.gcEnabled(): 3935 ↛ 3936line 3935 didn't jump to line 3936, because the condition on line 3935 was never true
3936 break
3938 sr.xapi.update_task_progress("done", coalesced)
3940 sr.cleanupCoalesceJournals()
3941 # Create the init file here in case startup is waiting on it
3942 _create_init_file(sr.uuid)
3943 sr.scanLocked()
3944 sr.updateBlockInfo()
3946 howmany = len(sr.findGarbage())
3947 if howmany > 0:
3948 Util.log("Found %d orphaned vdis" % howmany)
3949 sr.lock()
3950 try:
3951 sr.garbageCollect(dryRun)
3952 finally:
3953 sr.unlock()
3954 sr.xapi.srUpdate()
3956 candidate = sr.findCoalesceable()
3957 if candidate:
3958 util.fistpoint.activate(
3959 "LVHDRT_finding_a_suitable_pair", sr.uuid)
3960 sr.coalesce(candidate, dryRun)
3961 sr.xapi.srUpdate()
3962 coalesced += 1
3963 continue
3965 candidate = sr.findLeafCoalesceable()
3966 if candidate: 3966 ↛ 3973line 3966 didn't jump to line 3973, because the condition on line 3966 was never false
3967 sr.coalesceLeaf(candidate, dryRun)
3968 sr.xapi.srUpdate()
3969 coalesced += 1
3970 continue
3972 finally:
3973 lockGCRunning.release() 3973 ↛ 3978line 3973 didn't jump to line 3978, because the break on line 3936 wasn't executed
3974 except:
3975 task_status = "failure"
3976 raise
3977 finally:
3978 sr.xapi.set_task_status(task_status)
3979 Util.log("GC process exiting, no work left")
3980 _create_init_file(sr.uuid)
3981 lockGCActive.release()
3984def _gc(session, srUuid, dryRun=False, immediate=False):
3985 init(srUuid)
3986 sr = SR.getInstance(srUuid, session)
3987 if not sr.gcEnabled(False): 3987 ↛ 3988line 3987 didn't jump to line 3988, because the condition on line 3987 was never true
3988 return
3990 try:
3991 _gcLoop(sr, dryRun, immediate=immediate)
3992 finally:
3993 sr.check_no_space_candidates()
3994 sr.cleanup()
3995 sr.logFilter.logState()
3996 del sr.xapi
3999def _abort(srUuid, soft=False):
4000 """Aborts an GC/coalesce.
4002 srUuid: the UUID of the SR whose GC/coalesce must be aborted
4003 soft: If set to True and there is a pending abort signal, the function
4004 doesn't do anything. If set to False, a new abort signal is issued.
4006 returns: If soft is set to False, we return True holding lockGCActive. If
4007 soft is set to False and an abort signal is pending, we return False
4008 without holding lockGCActive. An exception is raised in case of error."""
4009 Util.log("=== SR %s: abort ===" % (srUuid))
4010 init(srUuid)
4011 if not lockGCActive.acquireNoblock():
4012 gotLock = False
4013 Util.log("Aborting currently-running instance (SR %s)" % srUuid)
4014 abortFlag = IPCFlag(srUuid)
4015 if not abortFlag.set(FLAG_TYPE_ABORT, soft):
4016 return False
4017 for i in range(SR.LOCK_RETRY_ATTEMPTS):
4018 gotLock = lockGCActive.acquireNoblock()
4019 if gotLock:
4020 break
4021 time.sleep(SR.LOCK_RETRY_INTERVAL)
4022 abortFlag.clear(FLAG_TYPE_ABORT)
4023 if not gotLock:
4024 raise util.CommandException(code=errno.ETIMEDOUT,
4025 reason="SR %s: error aborting existing process" % srUuid)
4026 return True
4029def init(srUuid):
4030 global lockGCRunning
4031 if not lockGCRunning: 4031 ↛ 4032line 4031 didn't jump to line 4032, because the condition on line 4031 was never true
4032 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
4033 global lockGCActive
4034 if not lockGCActive: 4034 ↛ 4035line 4034 didn't jump to line 4035, because the condition on line 4034 was never true
4035 lockGCActive = LockActive(srUuid)
4038class LockActive:
4039 """
4040 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired
4041 if another process holds the SR lock.
4042 """
4043 def __init__(self, srUuid):
4044 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
4045 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid)
4047 def acquireNoblock(self):
4048 self._srLock.acquire()
4050 try:
4051 return self._lock.acquireNoblock()
4052 finally:
4053 self._srLock.release()
4055 def release(self):
4056 self._lock.release()
4059def usage():
4060 output = """Garbage collect and/or coalesce COW images in a COW-based SR
4062Parameters:
4063 -u --uuid UUID SR UUID
4064 and one of:
4065 -g --gc garbage collect, coalesce, and repeat while there is work
4066 -G --gc_force garbage collect once, aborting any current operations
4067 -c --cache-clean <max_age> clean up IntelliCache cache files older than
4068 max_age hours
4069 -a --abort abort any currently running operation (GC or coalesce)
4070 -q --query query the current state (GC'ing, coalescing or not running)
4071 -x --disable disable GC/coalesce (will be in effect until you exit)
4072 -t --debug see Debug below
4074Options:
4075 -b --background run in background (return immediately) (valid for -g only)
4076 -f --force continue in the presence of COW images with errors (when doing
4077 GC, this might cause removal of any such images) (only valid
4078 for -G) (DANGEROUS)
4080Debug:
4081 The --debug parameter enables manipulation of LVHD VDIs for debugging
4082 purposes. ** NEVER USE IT ON A LIVE VM **
4083 The following parameters are required:
4084 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate",
4085 "deflate".
4086 -v --vdi_uuid VDI UUID
4087 """
4088 #-d --dry-run don't actually perform any SR-modifying operations
4089 print(output)
4090 Util.log("(Invalid usage)")
4091 sys.exit(1)
4094##############################################################################
4095#
4096# API
4097#
4098def abort(srUuid, soft=False):
4099 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair.
4100 """
4101 if _abort(srUuid, soft):
4102 Util.log("abort: releasing the process lock")
4103 lockGCActive.release()
4104 return True
4105 else:
4106 return False
4109def gc(session, srUuid, inBackground, dryRun=False):
4110 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return
4111 immediately if inBackground=True.
4113 The following algorithm is used:
4114 1. If we are already GC'ing in this SR, return
4115 2. If we are already coalescing a VDI pair:
4116 a. Scan the SR and determine if the VDI pair is GC'able
4117 b. If the pair is not GC'able, return
4118 c. If the pair is GC'able, abort coalesce
4119 3. Scan the SR
4120 4. If there is nothing to collect, nor to coalesce, return
4121 5. If there is something to collect, GC all, then goto 3
4122 6. If there is something to coalesce, coalesce one pair, then goto 3
4123 """
4124 Util.log("=== SR %s: gc ===" % srUuid)
4126 signal.signal(signal.SIGTERM, receiveSignal)
4128 if inBackground:
4129 if daemonize(): 4129 ↛ exitline 4129 didn't return from function 'gc', because the condition on line 4129 was never false
4130 # we are now running in the background. Catch & log any errors
4131 # because there is no other way to propagate them back at this
4132 # point
4134 try:
4135 _gc(None, srUuid, dryRun)
4136 except AbortException:
4137 Util.log("Aborted")
4138 except Exception:
4139 Util.logException("gc")
4140 Util.log("* * * * * SR %s: ERROR\n" % srUuid)
4141 os._exit(0)
4142 else:
4143 _gc(session, srUuid, dryRun, immediate=True)
4146def start_gc(session, sr_uuid):
4147 """
4148 This function is used to try to start a backgrounded GC session by forking
4149 the current process. If using the systemd version, call start_gc_service() instead.
4150 """
4151 # don't bother if an instance already running (this is just an
4152 # optimization to reduce the overhead of forking a new process if we
4153 # don't have to, but the process will check the lock anyways)
4154 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
4155 if not lockRunning.acquireNoblock():
4156 if should_preempt(session, sr_uuid):
4157 util.SMlog("Aborting currently-running coalesce of garbage VDI")
4158 try:
4159 if not abort(sr_uuid, soft=True):
4160 util.SMlog("The GC has already been scheduled to re-start")
4161 except util.CommandException as e:
4162 if e.code != errno.ETIMEDOUT:
4163 raise
4164 util.SMlog('failed to abort the GC')
4165 else:
4166 util.SMlog("A GC instance already running, not kicking")
4167 return
4168 else:
4169 lockRunning.release()
4171 util.SMlog(f"Starting GC file is {__file__}")
4172 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
4173 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4175def start_gc_service(sr_uuid, wait=False):
4176 """
4177 This starts the templated systemd service which runs GC on the given SR UUID.
4178 If the service was already started, this is a no-op.
4180 Because the service is a one-shot with RemainAfterExit=no, when called with
4181 wait=True this will run the service synchronously and will not return until the
4182 run has finished. This is used to force a run of the GC instead of just kicking it
4183 in the background.
4184 """
4185 sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
4186 util.SMlog(f"Kicking SMGC@{sr_uuid}...")
4187 cmd=[ "/usr/bin/systemctl", "--quiet" ]
4188 if not wait: 4188 ↛ 4190line 4188 didn't jump to line 4190, because the condition on line 4188 was never false
4189 cmd.append("--no-block")
4190 cmd += ["start", f"SMGC@{sr_uuid_esc}"]
4191 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4194def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
4195 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
4196 the SR lock is held.
4197 The following algorithm is used:
4198 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce
4199 2. Scan the SR
4200 3. GC
4201 4. return
4202 """
4203 Util.log("=== SR %s: gc_force ===" % srUuid)
4204 init(srUuid)
4205 sr = SR.getInstance(srUuid, session, lockSR, True)
4206 if not lockGCActive.acquireNoblock():
4207 abort(srUuid)
4208 else:
4209 Util.log("Nothing was running, clear to proceed")
4211 if force:
4212 Util.log("FORCED: will continue even if there are COW image errors")
4213 sr.scanLocked(force)
4214 sr.cleanupCoalesceJournals()
4216 try:
4217 sr.cleanupCache()
4218 sr.garbageCollect(dryRun)
4219 finally:
4220 sr.cleanup()
4221 sr.logFilter.logState()
4222 lockGCActive.release()
4225def get_state(srUuid):
4226 """Return whether GC/coalesce is currently running or not. This asks systemd for
4227 the state of the templated SMGC service and will return True if it is "activating"
4228 or "running" (for completeness, as in practice it will never achieve the latter state)
4229 """
4230 sr_uuid_esc = srUuid.replace("-", "\\x2d")
4231 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
4232 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4233 state = result.stdout.decode('utf-8').rstrip()
4234 if state == "activating" or state == "running":
4235 return True
4236 return False
4239def should_preempt(session, srUuid):
4240 sr = SR.getInstance(srUuid, session)
4241 entries = sr.journaler.getAll(VDI.JRN_COALESCE)
4242 if len(entries) == 0:
4243 return False
4244 elif len(entries) > 1:
4245 raise util.SMException("More than one coalesce entry: " + str(entries))
4246 sr.scanLocked()
4247 coalescedUuid = entries.popitem()[0]
4248 garbage = sr.findGarbage()
4249 for vdi in garbage:
4250 if vdi.uuid == coalescedUuid:
4251 return True
4252 return False
4255def get_coalesceable_leaves(session, srUuid, vdiUuids):
4256 coalesceable = []
4257 sr = SR.getInstance(srUuid, session)
4258 sr.scanLocked()
4259 for uuid in vdiUuids:
4260 vdi = sr.getVDI(uuid)
4261 if not vdi:
4262 raise util.SMException("VDI %s not found" % uuid)
4263 if vdi.isLeafCoalesceable():
4264 coalesceable.append(uuid)
4265 return coalesceable
4268def cache_cleanup(session, srUuid, maxAge):
4269 sr = SR.getInstance(srUuid, session)
4270 return sr.cleanupCache(maxAge)
4273def debug(sr_uuid, cmd, vdi_uuid):
4274 Util.log("Debug command: %s" % cmd)
4275 sr = SR.getInstance(sr_uuid, None)
4276 if not isinstance(sr, LVMSR):
4277 print("Error: not an LVHD SR")
4278 return
4279 sr.scanLocked()
4280 vdi = sr.getVDI(vdi_uuid)
4281 if not vdi:
4282 print("Error: VDI %s not found")
4283 return
4284 print("Running %s on SR %s" % (cmd, sr))
4285 print("VDI before: %s" % vdi)
4286 if cmd == "activate":
4287 vdi._activate()
4288 print("VDI file: %s" % vdi.path)
4289 if cmd == "deactivate":
4290 ns = NS_PREFIX_LVM + sr.uuid
4291 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False)
4292 if cmd == "inflate":
4293 vdi.inflateFully()
4294 sr.cleanup()
4295 if cmd == "deflate":
4296 vdi.deflate()
4297 sr.cleanup()
4298 sr.scanLocked()
4299 print("VDI after: %s" % vdi)
4302def abort_optional_reenable(uuid):
4303 print("Disabling GC/coalesce for %s" % uuid)
4304 ret = _abort(uuid)
4305 input("Press enter to re-enable...")
4306 print("GC/coalesce re-enabled")
4307 lockGCRunning.release()
4308 if ret:
4309 lockGCActive.release()
4312##############################################################################
4313#
4314# CLI
4315#
4316def main():
4317 action = ""
4318 maxAge = 0
4319 uuid = ""
4320 background = False
4321 force = False
4322 dryRun = False
4323 debug_cmd = ""
4324 vdi_uuid = ""
4325 shortArgs = "gGc:aqxu:bfdt:v:"
4326 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable",
4327 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
4329 try:
4330 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs)
4331 except getopt.GetoptError:
4332 usage()
4333 for o, a in opts:
4334 if o in ("-g", "--gc"):
4335 action = "gc"
4336 if o in ("-G", "--gc_force"):
4337 action = "gc_force"
4338 if o in ("-c", "--clean_cache"):
4339 action = "clean_cache"
4340 maxAge = int(a)
4341 if o in ("-a", "--abort"):
4342 action = "abort"
4343 if o in ("-q", "--query"):
4344 action = "query"
4345 if o in ("-x", "--disable"):
4346 action = "disable"
4347 if o in ("-u", "--uuid"):
4348 uuid = a
4349 if o in ("-b", "--background"):
4350 background = True
4351 if o in ("-f", "--force"):
4352 force = True
4353 if o in ("-d", "--dry-run"):
4354 Util.log("Dry run mode")
4355 dryRun = True
4356 if o in ("-t", "--debug"):
4357 action = "debug"
4358 debug_cmd = a
4359 if o in ("-v", "--vdi_uuid"):
4360 vdi_uuid = a
4362 if not action or not uuid:
4363 usage()
4364 if action == "debug" and not (debug_cmd and vdi_uuid) or \
4365 action != "debug" and (debug_cmd or vdi_uuid):
4366 usage()
4368 if action != "query" and action != "debug":
4369 print("All output goes to log")
4371 if action == "gc":
4372 gc(None, uuid, background, dryRun)
4373 elif action == "gc_force":
4374 gc_force(None, uuid, force, dryRun, True)
4375 elif action == "clean_cache":
4376 cache_cleanup(None, uuid, maxAge)
4377 elif action == "abort":
4378 abort(uuid)
4379 elif action == "query":
4380 print("Currently running: %s" % get_state(uuid))
4381 elif action == "disable":
4382 abort_optional_reenable(uuid)
4383 elif action == "debug":
4384 debug(uuid, debug_cmd, vdi_uuid)
4387if __name__ == '__main__': 4387 ↛ 4388line 4387 didn't jump to line 4388, because the condition on line 4387 was never true
4388 main()