Coverage for drivers/cleanup.py : 34%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/python3
2#
3# Copyright (C) Citrix Systems Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; version 2.1 only.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Script to coalesce and garbage collect COW-based SR's in the background
19#
21from sm_typing import Any, Optional, List, override
23import os
24import os.path
25import sys
26import time
27import signal
28import subprocess
29import getopt
30import datetime
31import traceback
32import base64
33import zlib
34import errno
35import stat
37import XenAPI # pylint: disable=import-error
38import util
39import lvutil
40import lvmcache
41import journaler
42import fjournaler
43import lock
44import blktap2
45import xs_errors
46from refcounter import RefCounter
47from ipc import IPCFlag
48from lvmanager import LVActivator
49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG
50from functools import reduce
51from time import monotonic as _time
53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX
54from cowutil import CowImageInfo, CowUtil, getCowUtil
55from lvmcowutil import LV_PREFIX, LvmCowUtil
56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION
58try:
59 from linstorcowutil import LinstorCowUtil
60 from linstorjournaler import LinstorJournaler
61 from linstorvolumemanager import get_controller_uri
62 from linstorvolumemanager import LinstorVolumeManager
63 from linstorvolumemanager import LinstorVolumeManagerError
64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
66 LINSTOR_AVAILABLE = True
67except ImportError:
68 LINSTOR_AVAILABLE = False
70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not
71# possible due to lvhd_stop_using_() not working correctly. However, we leave
72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI
73# record for use by the offline tool (which makes the operation safe by pausing
74# the VM first)
75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True
77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce
79# process "lock", used simply as an indicator that a process already exists
80# that is doing GC/coalesce on this SR (such a process holds the lock, and we
81# check for the fact by trying the lock).
82lockGCRunning = None
84# process "lock" to indicate that the GC process has been activated but may not
85# yet be running, stops a second process from being started.
86LOCK_TYPE_GC_ACTIVE = "gc_active"
87lockGCActive = None
89# Default coalesce error rate limit, in messages per minute. A zero value
90# disables throttling, and a negative value disables error reporting.
91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
93COALESCE_LAST_ERR_TAG = 'last-coalesce-error'
94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate'
95VAR_RUN = "/var/run/"
96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log"
98N_RUNNING_AVERAGE = 10
100NON_PERSISTENT_DIR = '/run/nonpersistent/sm'
102# Signal Handler
103SIGTERM = False
106class AbortException(util.SMException):
107 pass
110def receiveSignal(signalNumber, frame):
111 global SIGTERM
113 util.SMlog("GC: recieved SIGTERM")
114 SIGTERM = True
115 return
118################################################################################
119#
120# Util
121#
122class Util:
123 RET_RC = 1
124 RET_STDOUT = 2
125 RET_STDERR = 4
127 UUID_LEN = 36
129 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024}
131 @staticmethod
132 def log(text) -> None:
133 util.SMlog(text, ident="SMGC")
135 @staticmethod
136 def logException(tag):
137 info = sys.exc_info()
138 if info[0] == SystemExit: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true
139 # this should not be happening when catching "Exception", but it is
140 sys.exit(0)
141 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2]))
142 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
143 Util.log(" ***********************")
144 Util.log(" * E X C E P T I O N *")
145 Util.log(" ***********************")
146 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1]))
147 Util.log(tb)
148 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
150 @staticmethod
151 def doexec(args, expectedRC, inputtext=None, ret=None, log=True):
152 "Execute a subprocess, then return its return code, stdout, stderr"
153 proc = subprocess.Popen(args,
154 stdin=subprocess.PIPE, \
155 stdout=subprocess.PIPE, \
156 stderr=subprocess.PIPE, \
157 shell=True, \
158 close_fds=True)
159 (stdout, stderr) = proc.communicate(inputtext)
160 stdout = str(stdout)
161 stderr = str(stderr)
162 rc = proc.returncode
163 if log:
164 Util.log("`%s`: %s" % (args, rc))
165 if type(expectedRC) != type([]):
166 expectedRC = [expectedRC]
167 if not rc in expectedRC:
168 reason = stderr.strip()
169 if stdout.strip():
170 reason = "%s (stdout: %s)" % (reason, stdout.strip())
171 Util.log("Failed: %s" % reason)
172 raise util.CommandException(rc, args, reason)
174 if ret == Util.RET_RC:
175 return rc
176 if ret == Util.RET_STDERR:
177 return stderr
178 return stdout
180 @staticmethod
181 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut):
182 """execute func in a separate thread and kill it if abortTest signals
183 so"""
184 abortSignaled = abortTest() # check now before we clear resultFlag
185 resultFlag = IPCFlag(ns)
186 resultFlag.clearAll()
187 pid = os.fork()
188 if pid:
189 startTime = _time()
190 try:
191 while True:
192 if resultFlag.test("success"):
193 Util.log(" Child process completed successfully")
194 resultFlag.clear("success")
195 return
196 if resultFlag.test("failure"):
197 resultFlag.clear("failure")
198 raise util.SMException("Child process exited with error")
199 if abortTest() or abortSignaled or SIGTERM:
200 os.killpg(pid, signal.SIGKILL)
201 raise AbortException("Aborting due to signal")
202 if timeOut and _time() - startTime > timeOut:
203 os.killpg(pid, signal.SIGKILL)
204 resultFlag.clearAll()
205 raise util.SMException("Timed out")
206 time.sleep(pollInterval)
207 finally:
208 wait_pid = 0
209 rc = -1
210 count = 0
211 while wait_pid == 0 and count < 10:
212 wait_pid, rc = os.waitpid(pid, os.WNOHANG)
213 if wait_pid == 0:
214 time.sleep(2)
215 count += 1
217 if wait_pid == 0:
218 Util.log("runAbortable: wait for process completion timed out")
219 else:
220 os.setpgrp()
221 try:
222 if func() == ret:
223 resultFlag.set("success")
224 else:
225 resultFlag.set("failure")
226 except Exception as e:
227 Util.log("Child process failed with : (%s)" % e)
228 resultFlag.set("failure")
229 Util.logException("This exception has occured")
230 os._exit(0)
232 @staticmethod
233 def num2str(number):
234 for prefix in ("G", "M", "K"): 234 ↛ 237line 234 didn't jump to line 237, because the loop on line 234 didn't complete
235 if number >= Util.PREFIX[prefix]:
236 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix)
237 return "%s" % number
239 @staticmethod
240 def numBits(val):
241 count = 0
242 while val:
243 count += val & 1
244 val = val >> 1
245 return count
247 @staticmethod
248 def countBits(bitmap1, bitmap2):
249 """return bit count in the bitmap produced by ORing the two bitmaps"""
250 len1 = len(bitmap1)
251 len2 = len(bitmap2)
252 lenLong = len1
253 lenShort = len2
254 bitmapLong = bitmap1
255 if len2 > len1:
256 lenLong = len2
257 lenShort = len1
258 bitmapLong = bitmap2
260 count = 0
261 for i in range(lenShort):
262 val = bitmap1[i] | bitmap2[i]
263 count += Util.numBits(val)
265 for i in range(i + 1, lenLong):
266 val = bitmapLong[i]
267 count += Util.numBits(val)
268 return count
270 @staticmethod
271 def getThisScript():
272 thisScript = util.get_real_path(__file__)
273 if thisScript.endswith(".pyc"):
274 thisScript = thisScript[:-1]
275 return thisScript
278################################################################################
279#
280# XAPI
281#
282class XAPI:
283 USER = "root"
284 PLUGIN_ON_SLAVE = "on-slave"
286 CONFIG_SM = 0
287 CONFIG_OTHER = 1
288 CONFIG_ON_BOOT = 2
289 CONFIG_ALLOW_CACHING = 3
291 CONFIG_NAME = {
292 CONFIG_SM: "sm-config",
293 CONFIG_OTHER: "other-config",
294 CONFIG_ON_BOOT: "on-boot",
295 CONFIG_ALLOW_CACHING: "allow_caching"
296 }
298 class LookupError(util.SMException):
299 pass
301 @staticmethod
302 def getSession():
303 session = XenAPI.xapi_local()
304 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM')
305 return session
307 def __init__(self, session, srUuid):
308 self.sessionPrivate = False
309 self.session = session
310 if self.session is None:
311 self.session = self.getSession()
312 self.sessionPrivate = True
313 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid)
314 self.srRecord = self.session.xenapi.SR.get_record(self._srRef)
315 self.hostUuid = util.get_this_host()
316 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
317 self.task = None
318 self.task_progress = {"coalescable": 0, "done": 0}
320 def __del__(self):
321 if self.sessionPrivate:
322 self.session.xenapi.session.logout()
324 @property
325 def srRef(self):
326 return self._srRef
328 def isPluggedHere(self):
329 pbds = self.getAttachedPBDs()
330 for pbdRec in pbds:
331 if pbdRec["host"] == self._hostRef:
332 return True
333 return False
335 def poolOK(self):
336 host_recs = self.session.xenapi.host.get_all_records()
337 for host_ref, host_rec in host_recs.items():
338 if not host_rec["enabled"]:
339 Util.log("Host %s not enabled" % host_rec["uuid"])
340 return False
341 return True
343 def isMaster(self):
344 if self.srRecord["shared"]:
345 pool = list(self.session.xenapi.pool.get_all_records().values())[0]
346 return pool["master"] == self._hostRef
347 else:
348 pbds = self.getAttachedPBDs()
349 if len(pbds) < 1:
350 raise util.SMException("Local SR not attached")
351 elif len(pbds) > 1:
352 raise util.SMException("Local SR multiply attached")
353 return pbds[0]["host"] == self._hostRef
355 def getAttachedPBDs(self):
356 """Return PBD records for all PBDs of this SR that are currently
357 attached"""
358 attachedPBDs = []
359 pbds = self.session.xenapi.PBD.get_all_records()
360 for pbdRec in pbds.values():
361 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]:
362 attachedPBDs.append(pbdRec)
363 return attachedPBDs
365 def getOnlineHosts(self):
366 return util.get_online_hosts(self.session)
368 def ensureInactive(self, hostRef, args):
369 text = self.session.xenapi.host.call_plugin( \
370 hostRef, self.PLUGIN_ON_SLAVE, "multi", args)
371 Util.log("call-plugin returned: '%s'" % text)
373 def getRecordHost(self, hostRef):
374 return self.session.xenapi.host.get_record(hostRef)
376 def _getRefVDI(self, uuid):
377 return self.session.xenapi.VDI.get_by_uuid(uuid)
379 def getRefVDI(self, vdi):
380 return self._getRefVDI(vdi.uuid)
382 def getRecordVDI(self, uuid):
383 try:
384 ref = self._getRefVDI(uuid)
385 return self.session.xenapi.VDI.get_record(ref)
386 except XenAPI.Failure:
387 return None
389 def singleSnapshotVDI(self, vdi):
390 return self.session.xenapi.VDI.snapshot(vdi.getRef(),
391 {"type": "internal"})
393 def forgetVDI(self, srUuid, vdiUuid):
394 """Forget the VDI, but handle the case where the VDI has already been
395 forgotten (i.e. ignore errors)"""
396 try:
397 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid)
398 self.session.xenapi.VDI.forget(vdiRef)
399 except XenAPI.Failure:
400 pass
402 def getConfigVDI(self, vdi, key):
403 kind = vdi.CONFIG_TYPE[key]
404 if kind == self.CONFIG_SM:
405 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef())
406 elif kind == self.CONFIG_OTHER:
407 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef())
408 elif kind == self.CONFIG_ON_BOOT:
409 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef())
410 elif kind == self.CONFIG_ALLOW_CACHING:
411 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef())
412 else:
413 assert(False)
414 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg)))
415 return cfg
417 def removeFromConfigVDI(self, vdi, key):
418 kind = vdi.CONFIG_TYPE[key]
419 if kind == self.CONFIG_SM:
420 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key)
421 elif kind == self.CONFIG_OTHER:
422 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key)
423 else:
424 assert(False)
426 def addToConfigVDI(self, vdi, key, val):
427 kind = vdi.CONFIG_TYPE[key]
428 if kind == self.CONFIG_SM:
429 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val)
430 elif kind == self.CONFIG_OTHER:
431 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val)
432 else:
433 assert(False)
435 def isSnapshot(self, vdi):
436 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
438 def markCacheSRsDirty(self):
439 sr_refs = self.session.xenapi.SR.get_all_records_where( \
440 'field "local_cache_enabled" = "true"')
441 for sr_ref in sr_refs:
442 Util.log("Marking SR %s dirty" % sr_ref)
443 util.set_dirty(self.session, sr_ref)
445 def srUpdate(self):
446 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"])
447 abortFlag = IPCFlag(self.srRecord["uuid"])
448 task = self.session.xenapi.Async.SR.update(self._srRef)
449 cancelTask = True
450 try:
451 for i in range(60):
452 status = self.session.xenapi.task.get_status(task)
453 if not status == "pending":
454 Util.log("SR.update_asynch status changed to [%s]" % status)
455 cancelTask = False
456 return
457 if abortFlag.test(FLAG_TYPE_ABORT):
458 Util.log("Abort signalled during srUpdate, cancelling task...")
459 try:
460 self.session.xenapi.task.cancel(task)
461 cancelTask = False
462 Util.log("Task cancelled")
463 except:
464 pass
465 return
466 time.sleep(1)
467 finally:
468 if cancelTask:
469 self.session.xenapi.task.cancel(task)
470 self.session.xenapi.task.destroy(task)
471 Util.log("Asynch srUpdate still running, but timeout exceeded.")
473 def update_task(self):
474 self.session.xenapi.task.set_other_config(
475 self.task,
476 {
477 "applies_to": self._srRef
478 })
479 total = self.task_progress['coalescable'] + self.task_progress['done']
480 if (total > 0):
481 self.session.xenapi.task.set_progress(
482 self.task, float(self.task_progress['done']) / total)
484 def create_task(self, label, description):
485 self.task = self.session.xenapi.task.create(label, description)
486 self.update_task()
488 def update_task_progress(self, key, value):
489 self.task_progress[key] = value
490 if self.task:
491 self.update_task()
493 def set_task_status(self, status):
494 if self.task:
495 self.session.xenapi.task.set_status(self.task, status)
498################################################################################
499#
500# VDI
501#
502class VDI(object):
503 """Object representing a VDI of a COW-based SR"""
505 POLL_INTERVAL = 1
506 POLL_TIMEOUT = 30
507 DEVICE_MAJOR = 202
509 # config keys & values
510 DB_VDI_PARENT = "vhd-parent"
511 DB_VDI_TYPE = "vdi_type"
512 DB_VDI_BLOCKS = "vhd-blocks"
513 DB_VDI_PAUSED = "paused"
514 DB_VDI_RELINKING = "relinking"
515 DB_VDI_ACTIVATING = "activating"
516 DB_GC = "gc"
517 DB_COALESCE = "coalesce"
518 DB_LEAFCLSC = "leaf-coalesce" # config key
519 DB_GC_NO_SPACE = "gc_no_space"
520 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce
521 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce
522 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means
523 # no space to snap-coalesce or unable to keep
524 # up with VDI. This is not used by the SM, it
525 # might be used by external components.
526 DB_ONBOOT = "on-boot"
527 ONBOOT_RESET = "reset"
528 DB_ALLOW_CACHING = "allow_caching"
530 CONFIG_TYPE = {
531 DB_VDI_PARENT: XAPI.CONFIG_SM,
532 DB_VDI_TYPE: XAPI.CONFIG_SM,
533 DB_VDI_BLOCKS: XAPI.CONFIG_SM,
534 DB_VDI_PAUSED: XAPI.CONFIG_SM,
535 DB_VDI_RELINKING: XAPI.CONFIG_SM,
536 DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
537 DB_GC: XAPI.CONFIG_OTHER,
538 DB_COALESCE: XAPI.CONFIG_OTHER,
539 DB_LEAFCLSC: XAPI.CONFIG_OTHER,
540 DB_ONBOOT: XAPI.CONFIG_ON_BOOT,
541 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING,
542 DB_GC_NO_SPACE: XAPI.CONFIG_SM
543 }
545 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes
546 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds
547 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating
548 # feasibility of leaf coalesce
550 JRN_RELINK = "relink" # journal entry type for relinking children
551 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced
552 JRN_LEAF = "leaf" # used in coalesce-leaf
554 STR_TREE_INDENT = 4
556 def __init__(self, sr, uuid, vdi_type):
557 self.sr = sr
558 self.scanError = True
559 self.uuid = uuid
560 self.vdi_type = vdi_type
561 self.fileName = ""
562 self.parentUuid = ""
563 self.sizeVirt = -1
564 self._sizePhys = -1
565 self._sizeAllocated = -1
566 self._hidden = False
567 self.parent = None
568 self.children = []
569 self._vdiRef = None
570 self.cowutil = getCowUtil(vdi_type)
571 self._clearRef()
573 @staticmethod
574 def extractUuid(path):
575 raise NotImplementedError("Implement in sub class")
577 def load(self, info=None) -> None:
578 """Load VDI info"""
579 pass
581 def getDriverName(self) -> str:
582 return self.vdi_type
584 def getRef(self):
585 if self._vdiRef is None:
586 self._vdiRef = self.sr.xapi.getRefVDI(self)
587 return self._vdiRef
589 def getConfig(self, key, default=None):
590 config = self.sr.xapi.getConfigVDI(self, key)
591 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true
592 val = config
593 else:
594 val = config.get(key)
595 if val:
596 return val
597 return default
599 def setConfig(self, key, val):
600 self.sr.xapi.removeFromConfigVDI(self, key)
601 self.sr.xapi.addToConfigVDI(self, key, val)
602 Util.log("Set %s = %s for %s" % (key, val, self))
604 def delConfig(self, key):
605 self.sr.xapi.removeFromConfigVDI(self, key)
606 Util.log("Removed %s from %s" % (key, self))
608 def ensureUnpaused(self):
609 if self.getConfig(self.DB_VDI_PAUSED) == "true":
610 Util.log("Unpausing VDI %s" % self)
611 self.unpause()
613 def pause(self, failfast=False) -> None:
614 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid,
615 self.uuid, failfast):
616 raise util.SMException("Failed to pause VDI %s" % self)
618 def _report_tapdisk_unpause_error(self):
619 try:
620 xapi = self.sr.xapi.session.xenapi
621 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid)
622 msg_name = "failed to unpause tapdisk"
623 msg_body = "Failed to unpause tapdisk for VDI %s, " \
624 "VMs using this tapdisk have lost access " \
625 "to the corresponding disk(s)" % self.uuid
626 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body)
627 except Exception as e:
628 util.SMlog("failed to generate message: %s" % e)
630 def unpause(self):
631 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid,
632 self.uuid):
633 self._report_tapdisk_unpause_error()
634 raise util.SMException("Failed to unpause VDI %s" % self)
636 def refresh(self, ignoreNonexistent=True):
637 """Pause-unpause in one step"""
638 self.sr.lock()
639 try:
640 try:
641 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 641 ↛ 643line 641 didn't jump to line 643, because the condition on line 641 was never true
642 self.sr.uuid, self.uuid):
643 self._report_tapdisk_unpause_error()
644 raise util.SMException("Failed to refresh %s" % self)
645 except XenAPI.Failure as e:
646 if util.isInvalidVDI(e) and ignoreNonexistent:
647 Util.log("VDI %s not found, ignoring" % self)
648 return
649 raise
650 finally:
651 self.sr.unlock()
653 def isSnapshot(self):
654 return self.sr.xapi.isSnapshot(self)
656 def isAttachedRW(self):
657 return util.is_attached_rw(
658 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
660 def getVDIBlocks(self):
661 val = self.updateBlockInfo()
662 bitmap = zlib.decompress(base64.b64decode(val))
663 return bitmap
665 def isCoalesceable(self):
666 """A VDI is coalesceable if it has no siblings and is not a leaf"""
667 return not self.scanError and \
668 self.parent and \
669 len(self.parent.children) == 1 and \
670 self.isHidden() and \
671 len(self.children) > 0
673 def isLeafCoalesceable(self):
674 """A VDI is leaf-coalesceable if it has no siblings and is a leaf"""
675 return not self.scanError and \
676 self.parent and \
677 len(self.parent.children) == 1 and \
678 not self.isHidden() and \
679 len(self.children) == 0
681 def canLiveCoalesce(self, speed):
682 """Can we stop-and-leaf-coalesce this VDI? The VDI must be
683 isLeafCoalesceable() already"""
684 feasibleSize = False
685 allowedDownTime = \
686 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT
687 allocated_size = self.getAllocatedSize()
688 if speed:
689 feasibleSize = \
690 allocated_size // speed < allowedDownTime
691 else:
692 feasibleSize = \
693 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE
695 return (feasibleSize or
696 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
698 def getAllPrunable(self):
699 if len(self.children) == 0: # base case
700 # it is possible to have a hidden leaf that was recently coalesced
701 # onto its parent, its children already relinked but not yet
702 # reloaded - in which case it may not be garbage collected yet:
703 # some tapdisks could still be using the file.
704 if self.sr.journaler.get(self.JRN_RELINK, self.uuid):
705 return []
706 if not self.scanError and self.isHidden():
707 return [self]
708 return []
710 thisPrunable = True
711 vdiList = []
712 for child in self.children:
713 childList = child.getAllPrunable()
714 vdiList.extend(childList)
715 if child not in childList:
716 thisPrunable = False
718 # We can destroy the current VDI if all childs are hidden BUT the
719 # current VDI must be hidden too to do that!
720 # Example in this case (after a failed live leaf coalesce):
721 #
722 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees):
723 # SMGC: [32436] b5458d61(1.000G/4.127M)
724 # SMGC: [32436] *OLD_b545(1.000G/4.129M)
725 #
726 # OLD_b545 is hidden and must be removed, but b5458d61 not.
727 # Normally we are not in this function when the delete action is
728 # executed but in `_liveLeafCoalesce`.
730 if not self.scanError and not self.isHidden() and thisPrunable:
731 vdiList.append(self)
732 return vdiList
734 def getSizePhys(self) -> int:
735 return self._sizePhys
737 def getAllocatedSize(self) -> int:
738 return self._sizeAllocated
740 def getTreeRoot(self):
741 "Get the root of the tree that self belongs to"
742 root = self
743 while root.parent:
744 root = root.parent
745 return root
747 def getTreeHeight(self):
748 "Get the height of the subtree rooted at self"
749 if len(self.children) == 0:
750 return 1
752 maxChildHeight = 0
753 for child in self.children:
754 childHeight = child.getTreeHeight()
755 if childHeight > maxChildHeight:
756 maxChildHeight = childHeight
758 return maxChildHeight + 1
760 def getAllLeaves(self) -> List["VDI"]:
761 "Get all leaf nodes in the subtree rooted at self"
762 if len(self.children) == 0:
763 return [self]
765 leaves = []
766 for child in self.children:
767 leaves.extend(child.getAllLeaves())
768 return leaves
770 def updateBlockInfo(self) -> Optional[str]:
771 val = base64.b64encode(self._queryCowBlocks()).decode()
772 try:
773 self.setConfig(VDI.DB_VDI_BLOCKS, val)
774 except Exception:
775 if self.vdi_type != VdiType.QCOW2:
776 raise
777 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it
778 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely).
779 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped")
781 return val
783 def rename(self, uuid) -> None:
784 "Rename the VDI file"
785 assert(not self.sr.vdis.get(uuid))
786 self._clearRef()
787 oldUuid = self.uuid
788 self.uuid = uuid
789 self.children = []
790 # updating the children themselves is the responsibility of the caller
791 del self.sr.vdis[oldUuid]
792 self.sr.vdis[self.uuid] = self
794 def delete(self) -> None:
795 "Physically delete the VDI"
796 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
797 lock.Lock.cleanupAll(self.uuid)
798 self._clear()
800 def getParent(self) -> str:
801 return self.cowutil.getParent(self.path, lambda x: x.strip()) 801 ↛ exitline 801 didn't run the lambda on line 801
803 def repair(self, parent) -> None:
804 self.cowutil.repair(parent)
806 @override
807 def __str__(self) -> str:
808 strHidden = ""
809 if self.isHidden(): 809 ↛ 810line 809 didn't jump to line 810, because the condition on line 809 was never true
810 strHidden = "*"
811 strSizeVirt = "?"
812 if self.sizeVirt > 0: 812 ↛ 813line 812 didn't jump to line 813, because the condition on line 812 was never true
813 strSizeVirt = Util.num2str(self.sizeVirt)
814 strSizePhys = "?"
815 if self._sizePhys > 0: 815 ↛ 816line 815 didn't jump to line 816, because the condition on line 815 was never true
816 strSizePhys = "/%s" % Util.num2str(self._sizePhys)
817 strSizeAllocated = "?"
818 if self._sizeAllocated >= 0:
819 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated)
820 strType = "[{}]".format(self.vdi_type)
822 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt,
823 strSizePhys, strSizeAllocated, strType)
825 def validate(self, fast=False) -> None:
826 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 826 ↛ 827line 826 didn't jump to line 827, because the condition on line 826 was never true
827 raise util.SMException("COW image %s corrupted" % self)
829 def _clear(self):
830 self.uuid = ""
831 self.path = ""
832 self.parentUuid = ""
833 self.parent = None
834 self._clearRef()
836 def _clearRef(self):
837 self._vdiRef = None
839 def _call_plug_cancel(self, hostRef):
840 args = {"path": self.path, "vdi_type": self.vdi_type}
841 self.sr.xapi.session.xenapi.host.call_plugin( \
842 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args)
844 def _call_plugin_coalesce(self, hostRef):
845 args = {"path": self.path, "vdi_type": self.vdi_type}
846 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args))
847 ret = self.sr.xapi.session.xenapi.host.call_plugin( \
848 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args)
849 util.SMlog("DAMS: Remote coalesce returned {}".format(ret))
851 def _doCoalesceOnHost(self, hostRef):
852 self.validate()
853 self.parent.validate(True)
854 self.parent._increaseSizeVirt(self.sizeVirt)
855 self.sr._updateSlavesOnResize(self.parent)
856 #TODO: We might need to make the LV RW on the slave directly for coalesce?
857 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them
859 def abortTest():
860 file = self.sr._gc_running_file(self)
861 try:
862 with open(file, "r") as f:
863 if not f.read():
864 util.SMlog("DAMS: abortTest: Cancelling coalesce")
865 self._call_plug_cancel(hostRef)
866 return True
867 except OSError as e:
868 if e.errno == errno.ENOENT:
869 util.SMlog("File {} does not exist".format(file))
870 else:
871 util.SMlog("IOError: {}".format(e))
872 return True
873 except Exception as e2:
874 util.SMlog(f"DAMS: Error in AbortTest for _doCoalesceOnHost: {e2}")
875 return True
876 return False
878 #TODO: Add exception handling here like when callinng in a runAbortable situation_doCoalesceCOWImage
879 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef),
880 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
882 self.parent.validate(True)
883 #self._verifyContents(0)
884 self.parent.updateBlockInfo()
886 def _isOpenOnHosts(self) -> Optional[str]:
887 for pbdRecord in self.sr.xapi.getAttachedPBDs():
888 hostRef = pbdRecord["host"]
889 args = {"path": self.path}
890 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \
891 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args))
892 if is_openers:
893 return hostRef
894 return None
896 def _doCoalesce(self) -> None:
897 """Coalesce self onto parent. Only perform the actual coalescing of
898 an image, but not the subsequent relinking. We'll do that as the next step,
899 after reloading the entire SR in case things have changed while we
900 were coalescing"""
901 self.validate()
902 self.parent.validate(True)
903 self.parent._increaseSizeVirt(self.sizeVirt)
904 self.sr._updateSlavesOnResize(self.parent)
905 self._coalesceCowImage(0)
906 self.parent.validate(True)
907 #self._verifyContents(0)
908 self.parent.updateBlockInfo()
910 def _verifyContents(self, timeOut):
911 Util.log(" Coalesce verification on %s" % self)
912 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
913 Util.runAbortable(lambda: self._runTapdiskDiff(), True,
914 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
915 Util.log(" Coalesce verification succeeded")
917 def _runTapdiskDiff(self):
918 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \
919 (self.getDriverName(), self.path, \
920 self.parent.getDriverName(), self.parent.path)
921 Util.doexec(cmd, 0)
922 return True
924 @staticmethod
925 def _reportCoalesceError(vdi, ce):
926 """Reports a coalesce error to XenCenter.
928 vdi: the VDI object on which the coalesce error occured
929 ce: the CommandException that was raised"""
931 msg_name = os.strerror(ce.code)
932 if ce.code == errno.ENOSPC:
933 # TODO We could add more information here, e.g. exactly how much
934 # space is required for the particular coalesce, as well as actions
935 # to be taken by the user and consequences of not taking these
936 # actions.
937 msg_body = 'Run out of space while coalescing.'
938 elif ce.code == errno.EIO:
939 msg_body = 'I/O error while coalescing.'
940 else:
941 msg_body = ''
942 util.SMlog('Coalesce failed on SR %s: %s (%s)'
943 % (vdi.sr.uuid, msg_name, msg_body))
945 # Create a XenCenter message, but don't spam.
946 xapi = vdi.sr.xapi.session.xenapi
947 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid)
948 oth_cfg = xapi.SR.get_other_config(sr_ref)
949 if COALESCE_ERR_RATE_TAG in oth_cfg:
950 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG])
951 else:
952 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
954 xcmsg = False
955 if coalesce_err_rate == 0:
956 xcmsg = True
957 elif coalesce_err_rate > 0:
958 now = datetime.datetime.now()
959 sm_cfg = xapi.SR.get_sm_config(sr_ref)
960 if COALESCE_LAST_ERR_TAG in sm_cfg:
961 # seconds per message (minimum distance in time between two
962 # messages in seconds)
963 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60)
964 last = datetime.datetime.fromtimestamp(
965 float(sm_cfg[COALESCE_LAST_ERR_TAG]))
966 if now - last >= spm:
967 xapi.SR.remove_from_sm_config(sr_ref,
968 COALESCE_LAST_ERR_TAG)
969 xcmsg = True
970 else:
971 xcmsg = True
972 if xcmsg:
973 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG,
974 str(now.strftime('%s')))
975 if xcmsg:
976 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
978 def coalesce(self) -> int:
979 return self.cowutil.coalesce(self.path)
981 @staticmethod
982 def _doCoalesceCowImage(vdi: "VDI"):
983 try:
984 startTime = time.time()
985 allocated_size = vdi.getAllocatedSize()
986 coalesced_size = vdi.coalesce()
987 endTime = time.time()
988 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size)
989 except util.CommandException as ce:
990 # We use try/except for the following piece of code because it runs
991 # in a separate process context and errors will not be caught and
992 # reported by anyone.
993 try:
994 # Report coalesce errors back to user via XC
995 VDI._reportCoalesceError(vdi, ce)
996 except Exception as e:
997 util.SMlog('failed to create XenCenter message: %s' % e)
998 raise ce
999 except:
1000 raise
1002 def _vdi_is_raw(self, vdi_path):
1003 """
1004 Given path to vdi determine if it is raw
1005 """
1006 uuid = self.extractUuid(vdi_path)
1007 return self.sr.vdis[uuid].vdi_type == VdiType.RAW
1009 def _coalesceCowImage(self, timeOut):
1010 Util.log(" Running COW coalesce on %s" % self)
1011 def abortTest():
1012 if self.cowutil.isCoalesceableOnRemote():
1013 file = self.sr._gc_running_file(self)
1014 try:
1015 with open(file, "r") as f:
1016 if not f.read():
1017 return True
1018 except OSError as e:
1019 if e.errno == errno.ENOENT:
1020 util.SMlog("File {} does not exist".format(file))
1021 else:
1022 util.SMlog("IOError: {}".format(e))
1023 return True
1024 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1026 try:
1027 util.fistpoint.activate_custom_fn(
1028 "cleanup_coalesceVHD_inject_failure",
1029 util.inject_failure)
1030 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None,
1031 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
1032 except:
1033 # Exception at this phase could indicate a failure in COW coalesce
1034 # or a kill of COW coalesce by runAbortable due to timeOut
1035 # Try a repair and reraise the exception
1036 parent = ""
1037 try:
1038 parent = self.getParent()
1039 if not self._vdi_is_raw(parent):
1040 # Repair error is logged and ignored. Error reraised later
1041 util.SMlog('Coalesce failed on %s, attempting repair on ' \
1042 'parent %s' % (self.uuid, parent))
1043 self.repair(parent)
1044 except Exception as e:
1045 util.SMlog('(error ignored) Failed to repair parent %s ' \
1046 'after failed coalesce on %s, err: %s' %
1047 (parent, self.path, e))
1048 raise
1050 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid)
1052 def _relinkSkip(self) -> None:
1053 """Relink children of this VDI to point to the parent of this VDI"""
1054 abortFlag = IPCFlag(self.sr.uuid)
1055 for child in self.children:
1056 if abortFlag.test(FLAG_TYPE_ABORT): 1056 ↛ 1057line 1056 didn't jump to line 1057, because the condition on line 1056 was never true
1057 raise AbortException("Aborting due to signal")
1058 Util.log(" Relinking %s from %s to %s" % \
1059 (child, self, self.parent))
1060 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid)
1061 child._setParent(self.parent)
1062 self.children = []
1064 def _reloadChildren(self, vdiSkip):
1065 """Pause & unpause all VDIs in the subtree to cause blktap to reload
1066 the COW image metadata for this file in any online VDI"""
1067 abortFlag = IPCFlag(self.sr.uuid)
1068 for child in self.children:
1069 if child == vdiSkip:
1070 continue
1071 if abortFlag.test(FLAG_TYPE_ABORT): 1071 ↛ 1072line 1071 didn't jump to line 1072, because the condition on line 1071 was never true
1072 raise AbortException("Aborting due to signal")
1073 Util.log(" Reloading VDI %s" % child)
1074 child._reload()
1076 def _reload(self):
1077 """Pause & unpause to cause blktap to reload the image metadata"""
1078 for child in self.children: 1078 ↛ 1079line 1078 didn't jump to line 1079, because the loop on line 1078 never started
1079 child._reload()
1081 # only leaves can be attached
1082 if len(self.children) == 0: 1082 ↛ exitline 1082 didn't return from function '_reload', because the condition on line 1082 was never false
1083 try:
1084 self.delConfig(VDI.DB_VDI_RELINKING)
1085 except XenAPI.Failure as e:
1086 if not util.isInvalidVDI(e):
1087 raise
1088 self.refresh()
1090 def _tagChildrenForRelink(self):
1091 if len(self.children) == 0:
1092 retries = 0
1093 try:
1094 while retries < 15:
1095 retries += 1
1096 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
1097 Util.log("VDI %s is activating, wait to relink" %
1098 self.uuid)
1099 else:
1100 self.setConfig(VDI.DB_VDI_RELINKING, "True")
1102 if self.getConfig(VDI.DB_VDI_ACTIVATING):
1103 self.delConfig(VDI.DB_VDI_RELINKING)
1104 Util.log("VDI %s started activating while tagging" %
1105 self.uuid)
1106 else:
1107 return
1108 time.sleep(2)
1110 raise util.SMException("Failed to tag vdi %s for relink" % self)
1111 except XenAPI.Failure as e:
1112 if not util.isInvalidVDI(e):
1113 raise
1115 for child in self.children:
1116 child._tagChildrenForRelink()
1118 def _loadInfoParent(self):
1119 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid)
1120 if ret:
1121 self.parentUuid = ret
1123 def _setParent(self, parent) -> None:
1124 self.cowutil.setParent(self.path, parent.path, False)
1125 self.parent = parent
1126 self.parentUuid = parent.uuid
1127 parent.children.append(self)
1128 try:
1129 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1130 Util.log("Updated the vhd-parent field for child %s with %s" % \
1131 (self.uuid, self.parentUuid))
1132 except:
1133 Util.log("Failed to update %s with vhd-parent field %s" % \
1134 (self.uuid, self.parentUuid))
1136 def isHidden(self) -> bool:
1137 if self._hidden is None: 1137 ↛ 1138line 1137 didn't jump to line 1138, because the condition on line 1137 was never true
1138 self._loadInfoHidden()
1139 return self._hidden
1141 def _loadInfoHidden(self) -> None:
1142 hidden = self.cowutil.getHidden(self.path)
1143 self._hidden = (hidden != 0)
1145 def _setHidden(self, hidden=True) -> None:
1146 self._hidden = None
1147 self.cowutil.setHidden(self.path, hidden)
1148 self._hidden = hidden
1150 def _increaseSizeVirt(self, size, atomic=True) -> None:
1151 """ensure the virtual size of 'self' is at least 'size'. Note that
1152 resizing a COW image must always be offline and atomically: the file must
1153 not be open by anyone and no concurrent operations may take place.
1154 Thus we use the Agent API call for performing paused atomic
1155 operations. If the caller is already in the atomic context, it must
1156 call with atomic = False"""
1157 if self.sizeVirt >= size: 1157 ↛ 1159line 1157 didn't jump to line 1159, because the condition on line 1157 was never false
1158 return
1159 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1160 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1162 msize = self.cowutil.getMaxResizeSize(self.path)
1163 if (size <= msize):
1164 self.cowutil.setSizeVirtFast(self.path, size)
1165 else:
1166 if atomic:
1167 vdiList = self._getAllSubtree()
1168 self.sr.lock()
1169 try:
1170 self.sr.pauseVDIs(vdiList)
1171 try:
1172 self._setSizeVirt(size)
1173 finally:
1174 self.sr.unpauseVDIs(vdiList)
1175 finally:
1176 self.sr.unlock()
1177 else:
1178 self._setSizeVirt(size)
1180 self.sizeVirt = self.cowutil.getSizeVirt(self.path)
1182 def _setSizeVirt(self, size) -> None:
1183 """WARNING: do not call this method directly unless all VDIs in the
1184 subtree are guaranteed to be unplugged (and remain so for the duration
1185 of the operation): this operation is only safe for offline COW images"""
1186 jFile = os.path.join(self.sr.path, self.uuid)
1187 self.cowutil.setSizeVirt(self.path, size, jFile)
1189 def _queryCowBlocks(self) -> bytes:
1190 return self.cowutil.getBlockBitmap(self.path)
1192 def _getCoalescedSizeData(self):
1193 """Get the data size of the resulting image if we coalesce self onto
1194 parent. We calculate the actual size by using the image block allocation
1195 information (as opposed to just adding up the two image sizes to get an
1196 upper bound)"""
1197 # make sure we don't use stale BAT info from vdi_rec since the child
1198 # was writable all this time
1199 self.delConfig(VDI.DB_VDI_BLOCKS)
1200 blocksChild = self.getVDIBlocks()
1201 blocksParent = self.parent.getVDIBlocks()
1202 numBlocks = Util.countBits(blocksChild, blocksParent)
1203 Util.log("Num combined blocks = %d" % numBlocks)
1204 sizeData = numBlocks * self.cowutil.getBlockSize(self.path)
1205 assert(sizeData <= self.sizeVirt)
1206 return sizeData
1208 def _calcExtraSpaceForCoalescing(self) -> int:
1209 sizeData = self._getCoalescedSizeData()
1210 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \
1211 self.cowutil.calcOverheadEmpty(self.sizeVirt)
1212 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1213 return sizeCoalesced - self.parent.getSizePhys()
1215 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1216 """How much extra space in the SR will be required to
1217 [live-]leaf-coalesce this VDI"""
1218 # the space requirements are the same as for inline coalesce
1219 return self._calcExtraSpaceForCoalescing()
1221 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1222 """How much extra space in the SR will be required to
1223 snapshot-coalesce this VDI"""
1224 return self._calcExtraSpaceForCoalescing() + \
1225 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
1227 def _getAllSubtree(self):
1228 """Get self and all VDIs in the subtree of self as a flat list"""
1229 vdiList = [self]
1230 for child in self.children:
1231 vdiList.extend(child._getAllSubtree())
1232 return vdiList
1235class FileVDI(VDI):
1236 """Object representing a VDI in a file-based SR (EXT or NFS)"""
1238 @override
1239 @staticmethod
1240 def extractUuid(path):
1241 fileName = os.path.basename(path)
1242 return os.path.splitext(fileName)[0]
1244 def __init__(self, sr, uuid, vdi_type):
1245 VDI.__init__(self, sr, uuid, vdi_type)
1246 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1248 @override
1249 def load(self, info=None) -> None:
1250 if not info:
1251 if not util.pathexists(self.path):
1252 raise util.SMException("%s not found" % self.path)
1253 try:
1254 info = self.cowutil.getInfo(self.path, self.extractUuid)
1255 except util.SMException:
1256 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid)
1257 return
1258 self.parent = None
1259 self.children = []
1260 self.parentUuid = info.parentUuid
1261 self.sizeVirt = info.sizeVirt
1262 self._sizePhys = info.sizePhys
1263 self._sizeAllocated = info.sizeAllocated
1264 self._hidden = info.hidden
1265 self.scanError = False
1266 self.path = os.path.join(self.sr.path, "%s%s" % \
1267 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]))
1269 @override
1270 def rename(self, uuid) -> None:
1271 oldPath = self.path
1272 VDI.rename(self, uuid)
1273 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1274 self.path = os.path.join(self.sr.path, self.fileName)
1275 assert(not util.pathexists(self.path))
1276 Util.log("Renaming %s -> %s" % (oldPath, self.path))
1277 os.rename(oldPath, self.path)
1279 @override
1280 def delete(self) -> None:
1281 if len(self.children) > 0: 1281 ↛ 1282line 1281 didn't jump to line 1282, because the condition on line 1281 was never true
1282 raise util.SMException("VDI %s has children, can't delete" % \
1283 self.uuid)
1284 try:
1285 self.sr.lock()
1286 try:
1287 os.unlink(self.path)
1288 self.sr.forgetVDI(self.uuid)
1289 finally:
1290 self.sr.unlock()
1291 except OSError:
1292 raise util.SMException("os.unlink(%s) failed" % self.path)
1293 VDI.delete(self)
1295 @override
1296 def getAllocatedSize(self) -> int:
1297 if self._sizeAllocated == -1: 1297 ↛ 1298line 1297 didn't jump to line 1298, because the condition on line 1297 was never true
1298 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1299 return self._sizeAllocated
1302class LVMVDI(VDI):
1303 """Object representing a VDI in an LVM SR"""
1305 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent
1307 @override
1308 def load(self, info=None) -> None:
1309 # `info` is always set. `None` default value is only here to match parent method.
1310 assert info, "No info given to LVMVDI.load"
1311 self.parent = None
1312 self.children = []
1313 self._sizePhys = -1
1314 self._sizeAllocated = -1
1315 self.scanError = info.scanError
1316 self.sizeLV = info.sizeLV
1317 self.sizeVirt = info.sizeVirt
1318 self.fileName = info.lvName
1319 self.lvActive = info.lvActive
1320 self.lvOpen = info.lvOpen
1321 self.lvReadonly = info.lvReadonly
1322 self._hidden = info.hidden
1323 self.parentUuid = info.parentUuid
1324 self.path = os.path.join(self.sr.path, self.fileName)
1325 self.lvmcowutil = LvmCowUtil(self.cowutil)
1327 @override
1328 @staticmethod
1329 def extractUuid(path):
1330 return LvmCowUtil.extractUuid(path)
1332 def inflate(self, size):
1333 """inflate the LV containing the COW image to 'size'"""
1334 if not VdiType.isCowImage(self.vdi_type):
1335 return
1336 self._activate()
1337 self.sr.lock()
1338 try:
1339 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size)
1340 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid)
1341 finally:
1342 self.sr.unlock()
1343 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1344 self._sizePhys = -1
1345 self._sizeAllocated = -1
1347 def deflate(self):
1348 """deflate the LV containing the image to minimum"""
1349 if not VdiType.isCowImage(self.vdi_type):
1350 return
1351 self._activate()
1352 self.sr.lock()
1353 try:
1354 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys())
1355 finally:
1356 self.sr.unlock()
1357 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1358 self._sizePhys = -1
1359 self._sizeAllocated = -1
1361 def inflateFully(self):
1362 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt))
1364 def inflateParentForCoalesce(self):
1365 """Inflate the parent only as much as needed for the purposes of
1366 coalescing"""
1367 if not VdiType.isCowImage(self.parent.vdi_type):
1368 return
1369 inc = self._calcExtraSpaceForCoalescing()
1370 if inc > 0:
1371 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid)
1372 self.parent.inflate(self.parent.sizeLV + inc)
1374 @override
1375 def updateBlockInfo(self) -> Optional[str]:
1376 if VdiType.isCowImage(self.vdi_type):
1377 return VDI.updateBlockInfo(self)
1378 return None
1380 @override
1381 def rename(self, uuid) -> None:
1382 oldUuid = self.uuid
1383 oldLVName = self.fileName
1384 VDI.rename(self, uuid)
1385 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid
1386 self.path = os.path.join(self.sr.path, self.fileName)
1387 assert(not self.sr.lvmCache.checkLV(self.fileName))
1389 self.sr.lvmCache.rename(oldLVName, self.fileName)
1390 if self.sr.lvActivator.get(oldUuid, False):
1391 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
1393 ns = NS_PREFIX_LVM + self.sr.uuid
1394 (cnt, bcnt) = RefCounter.check(oldUuid, ns)
1395 RefCounter.set(self.uuid, cnt, bcnt, ns)
1396 RefCounter.reset(oldUuid, ns)
1398 @override
1399 def delete(self) -> None:
1400 if len(self.children) > 0:
1401 raise util.SMException("VDI %s has children, can't delete" % \
1402 self.uuid)
1403 self.sr.lock()
1404 try:
1405 self.sr.lvmCache.remove(self.fileName)
1406 self.sr.forgetVDI(self.uuid)
1407 finally:
1408 self.sr.unlock()
1409 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
1410 VDI.delete(self)
1412 @override
1413 def getSizePhys(self) -> int:
1414 if self._sizePhys == -1:
1415 self._loadInfoSizePhys()
1416 return self._sizePhys
1418 def _loadInfoSizePhys(self):
1419 """Get the physical utilization of the COW image file. We do it individually
1420 (and not using the COW batch scanner) as an optimization: this info is
1421 relatively expensive and we need it only for VDI's involved in
1422 coalescing."""
1423 if not VdiType.isCowImage(self.vdi_type):
1424 return
1425 self._activate()
1426 self._sizePhys = self.cowutil.getSizePhys(self.path)
1427 if self._sizePhys <= 0:
1428 raise util.SMException("phys size of %s = %d" % \
1429 (self, self._sizePhys))
1431 @override
1432 def getAllocatedSize(self) -> int:
1433 if self._sizeAllocated == -1:
1434 self._loadInfoSizeAllocated()
1435 return self._sizeAllocated
1437 def _loadInfoSizeAllocated(self):
1438 """
1439 Get the allocated size of the COW volume.
1440 """
1441 if not VdiType.isCowImage(self.vdi_type):
1442 return
1443 self._activate()
1444 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1446 @override
1447 def _loadInfoHidden(self) -> None:
1448 if not VdiType.isCowImage(self.vdi_type):
1449 self._hidden = self.sr.lvmCache.getHidden(self.fileName)
1450 else:
1451 VDI._loadInfoHidden(self)
1453 @override
1454 def _setHidden(self, hidden=True) -> None:
1455 if not VdiType.isCowImage(self.vdi_type):
1456 self._hidden = None
1457 self.sr.lvmCache.setHidden(self.fileName, hidden)
1458 self._hidden = hidden
1459 else:
1460 VDI._setHidden(self, hidden)
1462 @override
1463 def __str__(self) -> str:
1464 strType = self.vdi_type
1465 if self.vdi_type == VdiType.RAW:
1466 strType = "RAW"
1467 strHidden = ""
1468 if self.isHidden():
1469 strHidden = "*"
1470 strSizePhys = ""
1471 if self._sizePhys > 0:
1472 strSizePhys = Util.num2str(self._sizePhys)
1473 strSizeAllocated = ""
1474 if self._sizeAllocated >= 0:
1475 strSizeAllocated = Util.num2str(self._sizeAllocated)
1476 strActive = "n"
1477 if self.lvActive:
1478 strActive = "a"
1479 if self.lvOpen:
1480 strActive += "o"
1481 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType,
1482 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated,
1483 Util.num2str(self.sizeLV), strActive)
1485 @override
1486 def validate(self, fast=False) -> None:
1487 if VdiType.isCowImage(self.vdi_type):
1488 VDI.validate(self, fast)
1490 def _setChainRw(self) -> List[str]:
1491 """
1492 Set the readonly LV and children writable.
1493 It's needed because the coalesce can be done by tapdisk directly
1494 and it will need to write parent information for children.
1495 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part.
1496 Return a list of the LV that were previously readonly to be made RO again after the coalesce.
1497 """
1498 was_ro = []
1499 if self.lvReadonly:
1500 self.sr.lvmCache.setReadonly(self.fileName, False)
1501 was_ro.append(self.fileName)
1503 for child in self.children:
1504 if child.lvReadonly:
1505 self.sr.lvmCache.setReadonly(child.fileName, False)
1506 was_ro.append(child.fileName)
1508 return was_ro
1510 def _setChainRo(self, was_ro: List[str]) -> None:
1511 """Set the list of LV in parameters to readonly"""
1512 for lvName in was_ro:
1513 self.sr.lvmCache.setReadonly(lvName, True)
1515 @override
1516 def _doCoalesce(self) -> None:
1517 """LVMVDI parents must first be activated, inflated, and made writable"""
1518 was_ro = []
1519 try:
1520 self._activateChain()
1521 self.sr.lvmCache.setReadonly(self.parent.fileName, False)
1522 self.parent.validate()
1523 self.inflateParentForCoalesce()
1524 was_ro = self._setChainRw()
1525 VDI._doCoalesce(self)
1526 finally:
1527 self.parent._loadInfoSizePhys()
1528 self.parent.deflate()
1529 self.sr.lvmCache.setReadonly(self.parent.fileName, True)
1530 self._setChainRo(was_ro)
1532 @override
1533 def _setParent(self, parent) -> None:
1534 self._activate()
1535 if self.lvReadonly:
1536 self.sr.lvmCache.setReadonly(self.fileName, False)
1538 try:
1539 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW)
1540 finally:
1541 if self.lvReadonly:
1542 self.sr.lvmCache.setReadonly(self.fileName, True)
1543 self._deactivate()
1544 self.parent = parent
1545 self.parentUuid = parent.uuid
1546 parent.children.append(self)
1547 try:
1548 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1549 Util.log("Updated the VDI-parent field for child %s with %s" % \
1550 (self.uuid, self.parentUuid))
1551 except:
1552 Util.log("Failed to update the VDI-parent with %s for child %s" % \
1553 (self.parentUuid, self.uuid))
1555 def _activate(self):
1556 self.sr.lvActivator.activate(self.uuid, self.fileName, False)
1558 def _activateChain(self):
1559 vdi = self
1560 while vdi:
1561 vdi._activate()
1562 vdi = vdi.parent
1564 def _deactivate(self):
1565 self.sr.lvActivator.deactivate(self.uuid, False)
1567 @override
1568 def _increaseSizeVirt(self, size, atomic=True) -> None:
1569 "ensure the virtual size of 'self' is at least 'size'"
1570 self._activate()
1571 if VdiType.isCowImage(self.vdi_type):
1572 VDI._increaseSizeVirt(self, size, atomic)
1573 return
1575 # raw VDI case
1576 offset = self.sizeLV
1577 if self.sizeVirt < size:
1578 oldSize = self.sizeLV
1579 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size)
1580 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV))
1581 self.sr.lvmCache.setSize(self.fileName, self.sizeLV)
1582 offset = oldSize
1583 unfinishedZero = False
1584 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid)
1585 if jval:
1586 unfinishedZero = True
1587 offset = int(jval)
1588 length = self.sizeLV - offset
1589 if not length:
1590 return
1592 if unfinishedZero:
1593 Util.log(" ==> Redoing unfinished zeroing out")
1594 else:
1595 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \
1596 str(offset))
1597 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1598 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1599 func = lambda: util.zeroOut(self.path, offset, length)
1600 Util.runAbortable(func, True, self.sr.uuid, abortTest,
1601 VDI.POLL_INTERVAL, 0)
1602 self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
1604 @override
1605 def _setSizeVirt(self, size) -> None:
1606 """WARNING: do not call this method directly unless all VDIs in the
1607 subtree are guaranteed to be unplugged (and remain so for the duration
1608 of the operation): this operation is only safe for offline COW images."""
1609 self._activate()
1610 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid)
1611 try:
1612 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile)
1613 finally:
1614 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid)
1616 @override
1617 def _queryCowBlocks(self) -> bytes:
1618 self._activate()
1619 return VDI._queryCowBlocks(self)
1621 @override
1622 def _calcExtraSpaceForCoalescing(self) -> int:
1623 if not VdiType.isCowImage(self.parent.vdi_type):
1624 return 0 # raw parents are never deflated in the first place
1625 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData())
1626 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1627 return sizeCoalesced - self.parent.sizeLV
1629 @override
1630 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1631 """How much extra space in the SR will be required to
1632 [live-]leaf-coalesce this VDI"""
1633 # we can deflate the leaf to minimize the space requirements
1634 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys())
1635 return self._calcExtraSpaceForCoalescing() - deflateDiff
1637 @override
1638 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1639 return self._calcExtraSpaceForCoalescing() + \
1640 lvutil.calcSizeLV(self.getSizePhys())
1643class LinstorVDI(VDI):
1644 """Object representing a VDI in a LINSTOR SR"""
1646 VOLUME_LOCK_TIMEOUT = 30
1648 @override
1649 def load(self, info=None) -> None:
1650 self.parentUuid = info.parentUuid
1651 self.scanError = True
1652 self.parent = None
1653 self.children = []
1655 self.fileName = self.sr._linstor.get_volume_name(self.uuid)
1656 self.path = self.sr._linstor.build_device_path(self.fileName)
1657 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType)
1659 if not info:
1660 try:
1661 info = self.linstorcowutil.get_info(self.uuid)
1662 except util.SMException:
1663 Util.log(
1664 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid)
1665 )
1666 return
1668 self.parentUuid = info.parentUuid
1669 self.sizeVirt = info.sizeVirt
1670 self._sizePhys = -1
1671 self._sizeAllocated = -1
1672 self.drbd_size = -1
1673 self._hidden = info.hidden
1674 self.scanError = False
1676 @override
1677 def getSizePhys(self, fetch=False) -> int:
1678 if self._sizePhys < 0 or fetch:
1679 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid)
1680 return self._sizePhys
1682 def getDrbdSize(self, fetch=False):
1683 if self.drbd_size < 0 or fetch:
1684 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid)
1685 return self.drbd_size
1687 @override
1688 def getAllocatedSize(self) -> int:
1689 if self._sizeAllocated == -1:
1690 if VdiType.isCowImage(self.vdi_type):
1691 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid)
1692 return self._sizeAllocated
1694 def inflate(self, size):
1695 if not VdiType.isCowImage(self.vdi_type):
1696 return
1697 self.sr.lock()
1698 try:
1699 # Ensure we use the real DRBD size and not the cached one.
1700 # Why? Because this attribute can be changed if volume is resized by user.
1701 self.drbd_size = self.getDrbdSize(fetch=True)
1702 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size)
1703 finally:
1704 self.sr.unlock()
1705 self.drbd_size = -1
1706 self._sizePhys = -1
1707 self._sizeAllocated = -1
1709 def deflate(self):
1710 if not VdiType.isCowImage(self.vdi_type):
1711 return
1712 self.sr.lock()
1713 try:
1714 # Ensure we use the real sizes and not the cached info.
1715 self.drbd_size = self.getDrbdSize(fetch=True)
1716 self._sizePhys = self.getSizePhys(fetch=True)
1717 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False)
1718 finally:
1719 self.sr.unlock()
1720 self.drbd_size = -1
1721 self._sizePhys = -1
1722 self._sizeAllocated = -1
1724 def inflateFully(self):
1725 if VdiType.isCowImage(self.vdi_type):
1726 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt))
1728 @override
1729 def rename(self, uuid) -> None:
1730 Util.log('Renaming {} -> {} (path={})'.format(
1731 self.uuid, uuid, self.path
1732 ))
1733 self.sr._linstor.update_volume_uuid(self.uuid, uuid)
1734 VDI.rename(self, uuid)
1736 @override
1737 def delete(self) -> None:
1738 if len(self.children) > 0:
1739 raise util.SMException(
1740 'VDI {} has children, can\'t delete'.format(self.uuid)
1741 )
1742 self.sr.lock()
1743 try:
1744 self.sr._linstor.destroy_volume(self.uuid)
1745 self.sr.forgetVDI(self.uuid)
1746 finally:
1747 self.sr.unlock()
1748 VDI.delete(self)
1750 @override
1751 def validate(self, fast=False) -> None:
1752 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success:
1753 raise util.SMException('COW image {} corrupted'.format(self))
1755 @override
1756 def pause(self, failfast=False) -> None:
1757 self.sr._linstor.ensure_volume_is_not_locked(
1758 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1759 )
1760 return super(LinstorVDI, self).pause(failfast)
1762 @override
1763 def coalesce(self) -> int:
1764 # Note: We raise `SMException` here to skip the current coalesce in case of failure.
1765 # Using another exception we can't execute the next coalesce calls.
1766 return self.linstorcowutil.force_coalesce(self.path)
1768 @override
1769 def getParent(self) -> str:
1770 return self.linstorcowutil.get_parent(
1771 self.sr._linstor.get_volume_uuid_from_device_path(self.path)
1772 )
1774 @override
1775 def repair(self, parent_uuid) -> None:
1776 self.linstorcowutil.force_repair(
1777 self.sr._linstor.get_device_path(parent_uuid)
1778 )
1780 @override
1781 def _relinkSkip(self) -> None:
1782 abortFlag = IPCFlag(self.sr.uuid)
1783 for child in self.children:
1784 if abortFlag.test(FLAG_TYPE_ABORT):
1785 raise AbortException('Aborting due to signal')
1786 Util.log(
1787 ' Relinking {} from {} to {}'.format(
1788 child, self, self.parent
1789 )
1790 )
1792 session = child.sr.xapi.session
1793 sr_uuid = child.sr.uuid
1794 vdi_uuid = child.uuid
1795 try:
1796 self.sr._linstor.ensure_volume_is_not_locked(
1797 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1798 )
1799 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid)
1800 child._setParent(self.parent)
1801 finally:
1802 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
1803 self.children = []
1805 @override
1806 def _setParent(self, parent) -> None:
1807 self.sr._linstor.get_device_path(self.uuid)
1808 self.linstorcowutil.force_parent(self.path, parent.path)
1809 self.parent = parent
1810 self.parentUuid = parent.uuid
1811 parent.children.append(self)
1812 try:
1813 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1814 Util.log("Updated the vhd-parent field for child %s with %s" % \
1815 (self.uuid, self.parentUuid))
1816 except:
1817 Util.log("Failed to update %s with vhd-parent field %s" % \
1818 (self.uuid, self.parentUuid))
1820 @override
1821 def _doCoalesce(self) -> None:
1822 try:
1823 self._activateChain()
1824 self.parent.validate()
1825 self._inflateParentForCoalesce()
1826 VDI._doCoalesce(self)
1827 finally:
1828 self.parent.deflate()
1830 def _activateChain(self):
1831 vdi = self
1832 while vdi:
1833 try:
1834 p = self.sr._linstor.get_device_path(vdi.uuid)
1835 except Exception as e:
1836 # Use SMException to skip coalesce.
1837 # Otherwise the GC is stopped...
1838 raise util.SMException(str(e))
1839 vdi = vdi.parent
1841 @override
1842 def _setHidden(self, hidden=True) -> None:
1843 HIDDEN_TAG = 'hidden'
1845 if not VdiType.isCowImage(self.vdi_type):
1846 self._hidden = None
1847 self.sr._linstor.update_volume_metadata(self.uuid, {
1848 HIDDEN_TAG: hidden
1849 })
1850 self._hidden = hidden
1851 else:
1852 VDI._setHidden(self, hidden)
1854 @override
1855 def _increaseSizeVirt(self, size, atomic=True):
1856 if self.vdi_type == VdiType.RAW:
1857 offset = self.drbd_size
1858 if self.sizeVirt < size:
1859 oldSize = self.drbd_size
1860 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size)
1861 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size))
1862 self.sr._linstor.resize_volume(self.uuid, self.drbd_size)
1863 offset = oldSize
1864 unfinishedZero = False
1865 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid)
1866 if jval:
1867 unfinishedZero = True
1868 offset = int(jval)
1869 length = self.drbd_size - offset
1870 if not length:
1871 return
1873 if unfinishedZero:
1874 Util.log(" ==> Redoing unfinished zeroing out")
1875 else:
1876 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset))
1877 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1878 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1879 func = lambda: util.zeroOut(self.path, offset, length)
1880 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
1881 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid)
1882 return
1884 if self.sizeVirt >= size:
1885 return
1886 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1887 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1889 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024
1890 if (size <= msize):
1891 self.linstorcowutil.set_size_virt_fast(self.path, size)
1892 else:
1893 if atomic:
1894 vdiList = self._getAllSubtree()
1895 self.sr.lock()
1896 try:
1897 self.sr.pauseVDIs(vdiList)
1898 try:
1899 self._setSizeVirt(size)
1900 finally:
1901 self.sr.unpauseVDIs(vdiList)
1902 finally:
1903 self.sr.unlock()
1904 else:
1905 self._setSizeVirt(size)
1907 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid)
1909 @override
1910 def _setSizeVirt(self, size) -> None:
1911 jfile = self.uuid + '-jvhd'
1912 self.sr._linstor.create_volume(
1913 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile
1914 )
1915 try:
1916 self.inflate(self.linstorcowutil.compute_volume_size(size))
1917 self.linstorcowutil.set_size_virt(self.path, size, jfile)
1918 finally:
1919 try:
1920 self.sr._linstor.destroy_volume(jfile)
1921 except Exception:
1922 # We can ignore it, in any case this volume is not persistent.
1923 pass
1925 @override
1926 def _queryCowBlocks(self) -> bytes:
1927 return self.linstorcowutil.get_block_bitmap(self.uuid)
1929 def _inflateParentForCoalesce(self):
1930 if not VdiType.isCowImage(self.parent.vdi_type):
1931 return
1932 inc = self._calcExtraSpaceForCoalescing()
1933 if inc > 0:
1934 self.parent.inflate(self.parent.getDrbdSize() + inc)
1936 @override
1937 def _calcExtraSpaceForCoalescing(self) -> int:
1938 if not VdiType.isCowImage(self.parent.vdi_type):
1939 return 0
1940 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData())
1941 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced))
1942 return size_coalesced - self.parent.getDrbdSize()
1944 @override
1945 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1946 assert self.getDrbdSize() > 0
1947 assert self.getSizePhys() > 0
1948 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1949 assert deflate_diff >= 0
1950 return self._calcExtraSpaceForCoalescing() - deflate_diff
1952 @override
1953 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1954 assert self.getSizePhys() > 0
1955 return self._calcExtraSpaceForCoalescing() + \
1956 LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1958################################################################################
1959#
1960# SR
1961#
1962class SR(object):
1963 class LogFilter:
1964 def __init__(self, sr):
1965 self.sr = sr
1966 self.stateLogged = False
1967 self.prevState = {}
1968 self.currState = {}
1970 def logState(self):
1971 changes = ""
1972 self.currState.clear()
1973 for vdi in self.sr.vdiTrees:
1974 self.currState[vdi.uuid] = self._getTreeStr(vdi)
1975 if not self.prevState.get(vdi.uuid) or \
1976 self.prevState[vdi.uuid] != self.currState[vdi.uuid]:
1977 changes += self.currState[vdi.uuid]
1979 for uuid in self.prevState:
1980 if not self.currState.get(uuid):
1981 changes += "Tree %s gone\n" % uuid
1983 result = "SR %s (%d VDIs in %d COW trees): " % \
1984 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
1986 if len(changes) > 0:
1987 if self.stateLogged:
1988 result += "showing only COW trees that changed:"
1989 result += "\n%s" % changes
1990 else:
1991 result += "no changes"
1993 for line in result.split("\n"):
1994 Util.log("%s" % line)
1995 self.prevState.clear()
1996 for key, val in self.currState.items():
1997 self.prevState[key] = val
1998 self.stateLogged = True
2000 def logNewVDI(self, uuid):
2001 if self.stateLogged:
2002 Util.log("Found new VDI when scanning: %s" % uuid)
2004 def _getTreeStr(self, vdi, indent=8):
2005 treeStr = "%s%s\n" % (" " * indent, vdi)
2006 for child in vdi.children:
2007 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT)
2008 return treeStr
2010 TYPE_FILE = "file"
2011 TYPE_LVHD = "lvhd"
2012 TYPE_LINSTOR = "linstor"
2013 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR]
2015 LOCK_RETRY_INTERVAL = 3
2016 LOCK_RETRY_ATTEMPTS = 20
2017 LOCK_RETRY_ATTEMPTS_LOCK = 100
2019 SCAN_RETRY_ATTEMPTS = 3
2021 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM)
2022 TMP_RENAME_PREFIX = "OLD_"
2024 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline"
2025 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override"
2027 @staticmethod
2028 def getInstance(uuid, xapiSession, createLock=True, force=False):
2029 xapi = XAPI(xapiSession, uuid)
2030 type = normalizeType(xapi.srRecord["type"])
2031 if type == SR.TYPE_FILE:
2032 return FileSR(uuid, xapi, createLock, force)
2033 elif type == SR.TYPE_LVHD:
2034 return LVMSR(uuid, xapi, createLock, force)
2035 elif type == SR.TYPE_LINSTOR:
2036 return LinstorSR(uuid, xapi, createLock, force)
2037 raise util.SMException("SR type %s not recognized" % type)
2039 def __init__(self, uuid, xapi, createLock, force):
2040 self.logFilter = self.LogFilter(self)
2041 self.uuid = uuid
2042 self.path = ""
2043 self.name = ""
2044 self.vdis = {}
2045 self.vdiTrees = []
2046 self.journaler = None
2047 self.xapi = xapi
2048 self._locked = 0
2049 self._srLock = None
2050 if createLock: 2050 ↛ 2051line 2050 didn't jump to line 2051, because the condition on line 2050 was never true
2051 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid)
2052 else:
2053 Util.log("Requested no SR locking")
2054 self.name = self.xapi.srRecord["name_label"]
2055 self._failedCoalesceTargets = []
2057 if not self.xapi.isPluggedHere():
2058 if force: 2058 ↛ 2059line 2058 didn't jump to line 2059, because the condition on line 2058 was never true
2059 Util.log("SR %s not attached on this host, ignoring" % uuid)
2060 else:
2061 if not self.wait_for_plug():
2062 raise util.SMException("SR %s not attached on this host" % uuid)
2064 if force: 2064 ↛ 2065line 2064 didn't jump to line 2065, because the condition on line 2064 was never true
2065 Util.log("Not checking if we are Master (SR %s)" % uuid)
2066 elif not self.xapi.isMaster(): 2066 ↛ 2067line 2066 didn't jump to line 2067, because the condition on line 2066 was never true
2067 raise util.SMException("This host is NOT master, will not run")
2069 self.no_space_candidates = {}
2071 def msg_cleared(self, xapi_session, msg_ref):
2072 try:
2073 msg = xapi_session.xenapi.message.get_record(msg_ref)
2074 except XenAPI.Failure:
2075 return True
2077 return msg is None
2079 def check_no_space_candidates(self):
2080 xapi_session = self.xapi.getSession()
2082 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE)
2083 if self.no_space_candidates:
2084 if msg_id is None or self.msg_cleared(xapi_session, msg_id):
2085 util.SMlog("Could not coalesce due to a lack of space "
2086 f"in SR {self.uuid}")
2087 msg_body = ("Unable to perform data coalesce due to a lack "
2088 f"of space in SR {self.uuid}")
2089 msg_id = xapi_session.xenapi.message.create(
2090 'SM_GC_NO_SPACE',
2091 3,
2092 "SR",
2093 self.uuid,
2094 msg_body)
2095 xapi_session.xenapi.SR.remove_from_sm_config(
2096 self.xapi.srRef, VDI.DB_GC_NO_SPACE)
2097 xapi_session.xenapi.SR.add_to_sm_config(
2098 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id)
2100 for candidate in self.no_space_candidates.values():
2101 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id)
2102 elif msg_id is not None:
2103 # Everything was coalescable, remove the message
2104 xapi_session.xenapi.message.destroy(msg_id)
2106 def clear_no_space_msg(self, vdi):
2107 msg_id = None
2108 try:
2109 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE)
2110 except XenAPI.Failure:
2111 pass
2113 self.no_space_candidates.pop(vdi.uuid, None)
2114 if msg_id is not None: 2114 ↛ exitline 2114 didn't return from function 'clear_no_space_msg', because the condition on line 2114 was never false
2115 vdi.delConfig(VDI.DB_GC_NO_SPACE)
2118 def wait_for_plug(self):
2119 for _ in range(1, 10):
2120 time.sleep(2)
2121 if self.xapi.isPluggedHere():
2122 return True
2123 return False
2125 def gcEnabled(self, refresh=True):
2126 if refresh:
2127 self.xapi.srRecord = \
2128 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef)
2129 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false":
2130 Util.log("GC is disabled for this SR, abort")
2131 return False
2132 return True
2134 def scan(self, force=False) -> None:
2135 """Scan the SR and load VDI info for each VDI. If called repeatedly,
2136 update VDI objects if they already exist"""
2137 pass
2139 def scanLocked(self, force=False):
2140 self.lock()
2141 try:
2142 self.scan(force)
2143 finally:
2144 self.unlock()
2146 def getVDI(self, uuid):
2147 return self.vdis.get(uuid)
2149 def hasWork(self):
2150 if len(self.findGarbage()) > 0:
2151 return True
2152 if self.findCoalesceable():
2153 return True
2154 if self.findLeafCoalesceable():
2155 return True
2156 if self.needUpdateBlockInfo():
2157 return True
2158 return False
2160 def findCoalesceable(self):
2161 """Find a coalesceable VDI. Return a vdi that should be coalesced
2162 (choosing one among all coalesceable candidates according to some
2163 criteria) or None if there is no VDI that could be coalesced"""
2165 candidates = []
2167 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE)
2168 if srSwitch == "false":
2169 Util.log("Coalesce disabled for this SR")
2170 return candidates
2172 # finish any VDI for which a relink journal entry exists first
2173 journals = self.journaler.getAll(VDI.JRN_RELINK)
2174 for uuid in journals:
2175 vdi = self.getVDI(uuid)
2176 if vdi and vdi not in self._failedCoalesceTargets:
2177 return vdi
2179 for vdi in self.vdis.values():
2180 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
2181 candidates.append(vdi)
2182 Util.log("%s is coalescable" % vdi.uuid)
2184 self.xapi.update_task_progress("coalescable", len(candidates))
2186 # pick one in the tallest tree
2187 treeHeight = dict()
2188 for c in candidates:
2189 height = c.getTreeRoot().getTreeHeight()
2190 if treeHeight.get(height):
2191 treeHeight[height].append(c)
2192 else:
2193 treeHeight[height] = [c]
2195 freeSpace = self.getFreeSpace()
2196 heights = list(treeHeight.keys())
2197 heights.sort(reverse=True)
2198 for h in heights:
2199 for c in treeHeight[h]:
2200 spaceNeeded = c._calcExtraSpaceForCoalescing()
2201 if spaceNeeded <= freeSpace:
2202 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h))
2203 self.clear_no_space_msg(c)
2204 return c
2205 else:
2206 self.no_space_candidates[c.uuid] = c
2207 Util.log("No space to coalesce %s (free space: %d)" % \
2208 (c, freeSpace))
2209 return None
2211 def getSwitch(self, key):
2212 return self.xapi.srRecord["other_config"].get(key)
2214 def forbiddenBySwitch(self, switch, condition, fail_msg):
2215 srSwitch = self.getSwitch(switch)
2216 ret = False
2217 if srSwitch:
2218 ret = srSwitch == condition
2220 if ret:
2221 Util.log(fail_msg)
2223 return ret
2225 def leafCoalesceForbidden(self):
2226 return (self.forbiddenBySwitch(VDI.DB_COALESCE,
2227 "false",
2228 "Coalesce disabled for this SR") or
2229 self.forbiddenBySwitch(VDI.DB_LEAFCLSC,
2230 VDI.LEAFCLSC_DISABLED,
2231 "Leaf-coalesce disabled for this SR"))
2233 def findLeafCoalesceable(self):
2234 """Find leaf-coalesceable VDIs in each COW tree"""
2236 candidates = []
2237 if self.leafCoalesceForbidden():
2238 return candidates
2240 self.gatherLeafCoalesceable(candidates)
2242 self.xapi.update_task_progress("coalescable", len(candidates))
2244 freeSpace = self.getFreeSpace()
2245 for candidate in candidates:
2246 # check the space constraints to see if leaf-coalesce is actually
2247 # feasible for this candidate
2248 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing()
2249 spaceNeededLive = spaceNeeded
2250 if spaceNeeded > freeSpace:
2251 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing()
2252 if candidate.canLiveCoalesce(self.getStorageSpeed()):
2253 spaceNeeded = spaceNeededLive
2255 if spaceNeeded <= freeSpace:
2256 Util.log("Leaf-coalesce candidate: %s" % candidate)
2257 self.clear_no_space_msg(candidate)
2258 return candidate
2259 else:
2260 Util.log("No space to leaf-coalesce %s (free space: %d)" % \
2261 (candidate, freeSpace))
2262 if spaceNeededLive <= freeSpace:
2263 Util.log("...but enough space if skip snap-coalesce")
2264 candidate.setConfig(VDI.DB_LEAFCLSC,
2265 VDI.LEAFCLSC_OFFLINE)
2266 self.no_space_candidates[candidate.uuid] = candidate
2268 return None
2270 def gatherLeafCoalesceable(self, candidates):
2271 for vdi in self.vdis.values():
2272 if not vdi.isLeafCoalesceable():
2273 continue
2274 if vdi in self._failedCoalesceTargets:
2275 continue
2276 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET:
2277 Util.log("Skipping reset-on-boot %s" % vdi)
2278 continue
2279 if vdi.getConfig(vdi.DB_ALLOW_CACHING):
2280 Util.log("Skipping allow_caching=true %s" % vdi)
2281 continue
2282 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED:
2283 Util.log("Leaf-coalesce disabled for %s" % vdi)
2284 continue
2285 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or
2286 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
2287 continue
2288 candidates.append(vdi)
2290 def coalesce(self, vdi, dryRun=False):
2291 """Coalesce vdi onto parent"""
2292 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent))
2293 if dryRun: 2293 ↛ 2294line 2293 didn't jump to line 2294, because the condition on line 2293 was never true
2294 return
2296 try:
2297 self._coalesce(vdi)
2298 except util.SMException as e:
2299 if isinstance(e, AbortException): 2299 ↛ 2300line 2299 didn't jump to line 2300, because the condition on line 2299 was never true
2300 self.cleanup()
2301 raise
2302 else:
2303 self._failedCoalesceTargets.append(vdi)
2304 Util.logException("coalesce")
2305 Util.log("Coalesce failed, skipping")
2306 self.cleanup()
2308 def coalesceLeaf(self, vdi, dryRun=False):
2309 """Leaf-coalesce vdi onto parent"""
2310 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent))
2311 if dryRun:
2312 return
2314 try:
2315 uuid = vdi.uuid
2316 try:
2317 # "vdi" object will no longer be valid after this call
2318 self._coalesceLeaf(vdi)
2319 finally:
2320 vdi = self.getVDI(uuid)
2321 if vdi:
2322 vdi.delConfig(vdi.DB_LEAFCLSC)
2323 except AbortException:
2324 self.cleanup()
2325 raise
2326 except (util.SMException, XenAPI.Failure) as e:
2327 self._failedCoalesceTargets.append(vdi)
2328 Util.logException("leaf-coalesce")
2329 Util.log("Leaf-coalesce failed on %s, skipping" % vdi)
2330 self.cleanup()
2332 def garbageCollect(self, dryRun=False):
2333 vdiList = self.findGarbage()
2334 Util.log("Found %d VDIs for deletion:" % len(vdiList))
2335 for vdi in vdiList:
2336 Util.log(" %s" % vdi)
2337 if not dryRun:
2338 self.deleteVDIs(vdiList)
2339 self.cleanupJournals(dryRun)
2341 def findGarbage(self):
2342 vdiList = []
2343 for vdi in self.vdiTrees:
2344 vdiList.extend(vdi.getAllPrunable())
2345 return vdiList
2347 def deleteVDIs(self, vdiList) -> None:
2348 for vdi in vdiList:
2349 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT):
2350 raise AbortException("Aborting due to signal")
2351 Util.log("Deleting unlinked VDI %s" % vdi)
2352 self.deleteVDI(vdi)
2354 def deleteVDI(self, vdi) -> None:
2355 assert(len(vdi.children) == 0)
2356 del self.vdis[vdi.uuid]
2357 if vdi.parent: 2357 ↛ 2359line 2357 didn't jump to line 2359, because the condition on line 2357 was never false
2358 vdi.parent.children.remove(vdi)
2359 if vdi in self.vdiTrees: 2359 ↛ 2360line 2359 didn't jump to line 2360, because the condition on line 2359 was never true
2360 self.vdiTrees.remove(vdi)
2361 vdi.delete()
2363 def forgetVDI(self, vdiUuid) -> None:
2364 self.xapi.forgetVDI(self.uuid, vdiUuid)
2366 def pauseVDIs(self, vdiList) -> None:
2367 paused = []
2368 failed = False
2369 for vdi in vdiList:
2370 try:
2371 vdi.pause()
2372 paused.append(vdi)
2373 except:
2374 Util.logException("pauseVDIs")
2375 failed = True
2376 break
2378 if failed:
2379 self.unpauseVDIs(paused)
2380 raise util.SMException("Failed to pause VDIs")
2382 def unpauseVDIs(self, vdiList):
2383 failed = False
2384 for vdi in vdiList:
2385 try:
2386 vdi.unpause()
2387 except:
2388 Util.log("ERROR: Failed to unpause VDI %s" % vdi)
2389 failed = True
2390 if failed:
2391 raise util.SMException("Failed to unpause VDIs")
2393 def getFreeSpace(self) -> int:
2394 return 0
2396 def cleanup(self):
2397 Util.log("In cleanup")
2398 return
2400 @override
2401 def __str__(self) -> str:
2402 if self.name:
2403 ret = "%s ('%s')" % (self.uuid[0:4], self.name)
2404 else:
2405 ret = "%s" % self.uuid
2406 return ret
2408 def lock(self):
2409 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort
2410 signal to avoid deadlocking (trying to acquire the SR lock while the
2411 lock is held by a process that is trying to abort us)"""
2412 if not self._srLock:
2413 return
2415 if self._locked == 0:
2416 abortFlag = IPCFlag(self.uuid)
2417 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK):
2418 if self._srLock.acquireNoblock():
2419 self._locked += 1
2420 return
2421 if abortFlag.test(FLAG_TYPE_ABORT):
2422 raise AbortException("Abort requested")
2423 time.sleep(SR.LOCK_RETRY_INTERVAL)
2424 raise util.SMException("Unable to acquire the SR lock")
2426 self._locked += 1
2428 def unlock(self):
2429 if not self._srLock: 2429 ↛ 2431line 2429 didn't jump to line 2431, because the condition on line 2429 was never false
2430 return
2431 assert(self._locked > 0)
2432 self._locked -= 1
2433 if self._locked == 0:
2434 self._srLock.release()
2436 def needUpdateBlockInfo(self) -> bool:
2437 for vdi in self.vdis.values():
2438 if vdi.scanError or len(vdi.children) == 0:
2439 continue
2440 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2441 return True
2442 return False
2444 def updateBlockInfo(self) -> None:
2445 for vdi in self.vdis.values():
2446 if vdi.scanError or len(vdi.children) == 0:
2447 continue
2448 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2449 vdi.updateBlockInfo()
2451 def cleanupCoalesceJournals(self):
2452 """Remove stale coalesce VDI indicators"""
2453 entries = self.journaler.getAll(VDI.JRN_COALESCE)
2454 for uuid, jval in entries.items():
2455 self.journaler.remove(VDI.JRN_COALESCE, uuid)
2457 def cleanupJournals(self, dryRun=False):
2458 """delete journal entries for non-existing VDIs"""
2459 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]:
2460 entries = self.journaler.getAll(t)
2461 for uuid, jval in entries.items():
2462 if self.getVDI(uuid):
2463 continue
2464 if t == SR.JRN_CLONE:
2465 baseUuid, clonUuid = jval.split("_")
2466 if self.getVDI(baseUuid):
2467 continue
2468 Util.log(" Deleting stale '%s' journal entry for %s "
2469 "(%s)" % (t, uuid, jval))
2470 if not dryRun:
2471 self.journaler.remove(t, uuid)
2473 def cleanupCache(self, maxAge=-1) -> int:
2474 return 0
2476 def _hasLeavesAttachedOn(self, vdi: VDI):
2477 leaves = vdi.getAllLeaves()
2478 leaves_vdi = [leaf.uuid for leaf in leaves]
2479 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi)
2481 def _gc_running_file(self, vdi: VDI):
2482 run_file = "gc_running_{}".format(vdi.uuid)
2483 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file)
2485 def _create_running_file(self, vdi: VDI):
2486 with open(self._gc_running_file(vdi), "w") as f:
2487 f.write("1")
2489 def _delete_running_file(self, vdi: VDI):
2490 os.unlink(self._gc_running_file(vdi))
2492 def _coalesce(self, vdi: VDI):
2493 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2493 ↛ 2496line 2493 didn't jump to line 2496, because the condition on line 2493 was never true
2494 # this means we had done the actual coalescing already and just
2495 # need to finish relinking and/or refreshing the children
2496 Util.log("==> Coalesce apparently already done: skipping")
2497 else:
2498 # JRN_COALESCE is used to check which VDI is being coalesced in
2499 # order to decide whether to abort the coalesce. We remove the
2500 # journal as soon as the COW coalesce step is done, because we
2501 # don't expect the rest of the process to take long
2503 if os.path.exists(self._gc_running_file(vdi)): 2503 ↛ 2504line 2503 didn't jump to line 2504, because the condition on line 2503 was never true
2504 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid))
2506 self._create_running_file(vdi)
2508 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1")
2509 host_refs = self._hasLeavesAttachedOn(vdi)
2510 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time
2511 if len(host_refs) > 1: 2511 ↛ 2512line 2511 didn't jump to line 2512, because the condition on line 2511 was never true
2512 util.SMlog("Not coalesceable, chain activated more than once")
2513 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error
2515 try:
2516 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2516 ↛ 2518line 2516 didn't jump to line 2518, because the condition on line 2516 was never true
2517 #Leaf opened on another host, we need to call online coalesce
2518 util.SMlog("Remote coalesce for {}".format(vdi.path))
2519 vdi._doCoalesceOnHost(list(host_refs)[0])
2520 else:
2521 util.SMlog("Offline coalesce for {}".format(vdi.path))
2522 vdi._doCoalesce()
2523 except Exception as e:
2524 util.SMlog("EXCEPTION while coalescing: {}".format(e))
2525 self._delete_running_file(vdi)
2526 raise
2528 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
2529 self._delete_running_file(vdi)
2531 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid)
2533 # we now need to relink the children: lock the SR to prevent ops
2534 # like SM.clone from manipulating the VDIs we'll be relinking and
2535 # rescan the SR first in case the children changed since the last
2536 # scan
2537 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
2539 self.lock()
2540 try:
2541 vdi.parent._tagChildrenForRelink()
2542 self.scan()
2543 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues
2544 finally:
2545 self.unlock()
2546 # Reload the children to leave things consistent
2547 vdi.parent._reloadChildren(vdi)
2548 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
2550 self.deleteVDI(vdi)
2552 class CoalesceTracker:
2553 GRACE_ITERATIONS = 2
2554 MAX_ITERATIONS_NO_PROGRESS = 3
2555 MAX_ITERATIONS = 10
2556 MAX_INCREASE_FROM_MINIMUM = 1.2
2557 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \
2558 " --> Final size {finSize}"
2560 def __init__(self, sr):
2561 self.itsNoProgress = 0
2562 self.its = 0
2563 self.minSize = float("inf")
2564 self.history = []
2565 self.reason = ""
2566 self.startSize = None
2567 self.finishSize = None
2568 self.sr = sr
2569 self.grace_remaining = self.GRACE_ITERATIONS
2571 def abortCoalesce(self, prevSize, curSize):
2572 self.its += 1
2573 self.history.append(self.HISTORY_STRING.format(its=self.its,
2574 initSize=prevSize,
2575 finSize=curSize))
2577 self.finishSize = curSize
2579 if self.startSize is None:
2580 self.startSize = prevSize
2582 if curSize < self.minSize:
2583 self.minSize = curSize
2585 if prevSize < self.minSize:
2586 self.minSize = prevSize
2588 if self.its == 1:
2589 # Skip evaluating conditions on first iteration
2590 return False
2592 if prevSize < curSize:
2593 self.itsNoProgress += 1
2594 Util.log("No progress, attempt:"
2595 " {attempt}".format(attempt=self.itsNoProgress))
2596 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid)
2597 else:
2598 # We made progress
2599 return False
2601 if self.its > self.MAX_ITERATIONS:
2602 max = self.MAX_ITERATIONS
2603 self.reason = \
2604 "Max iterations ({max}) exceeded".format(max=max)
2605 return True
2607 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS:
2608 max = self.MAX_ITERATIONS_NO_PROGRESS
2609 self.reason = \
2610 "No progress made for {max} iterations".format(max=max)
2611 return True
2613 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize
2614 if curSize > maxSizeFromMin:
2615 self.grace_remaining -= 1
2616 if self.grace_remaining == 0:
2617 self.reason = "Unexpected bump in size," \
2618 " compared to minimum achieved"
2620 return True
2622 return False
2624 def printSizes(self):
2625 Util.log("Starting size was {size}"
2626 .format(size=self.startSize))
2627 Util.log("Final size was {size}"
2628 .format(size=self.finishSize))
2629 Util.log("Minimum size achieved was {size}"
2630 .format(size=self.minSize))
2632 def printReasoning(self):
2633 Util.log("Aborted coalesce")
2634 for hist in self.history:
2635 Util.log(hist)
2636 Util.log(self.reason)
2637 self.printSizes()
2639 def printSummary(self):
2640 if self.its == 0:
2641 return
2643 if self.reason: 2643 ↛ 2644line 2643 didn't jump to line 2644, because the condition on line 2643 was never true
2644 Util.log("Aborted coalesce")
2645 Util.log(self.reason)
2646 else:
2647 Util.log("Coalesce summary")
2649 Util.log(f"Performed {self.its} iterations")
2650 self.printSizes()
2653 def _coalesceLeaf(self, vdi):
2654 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
2655 complete due to external changes, namely vdi_delete and vdi_snapshot
2656 that alter leaf-coalescibility of vdi"""
2657 tracker = self.CoalesceTracker(self)
2658 while not vdi.canLiveCoalesce(self.getStorageSpeed()):
2659 prevSizePhys = vdi.getSizePhys()
2660 if not self._snapshotCoalesce(vdi): 2660 ↛ 2661line 2660 didn't jump to line 2661, because the condition on line 2660 was never true
2661 return False
2662 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()):
2663 tracker.printReasoning()
2664 raise util.SMException("VDI {uuid} could not be coalesced"
2665 .format(uuid=vdi.uuid))
2666 tracker.printSummary()
2667 return self._liveLeafCoalesce(vdi)
2669 def calcStorageSpeed(self, startTime, endTime, coalescedSize):
2670 speed = None
2671 total_time = endTime - startTime
2672 if total_time > 0:
2673 speed = float(coalescedSize) / float(total_time)
2674 return speed
2676 def writeSpeedToFile(self, speed):
2677 content = []
2678 speedFile = None
2679 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2680 self.lock()
2681 try:
2682 Util.log("Writing to file: {myfile}".format(myfile=path))
2683 lines = ""
2684 if not os.path.isfile(path):
2685 lines = str(speed) + "\n"
2686 else:
2687 speedFile = open(path, "r+")
2688 content = speedFile.readlines()
2689 content.append(str(speed) + "\n")
2690 if len(content) > N_RUNNING_AVERAGE:
2691 del content[0]
2692 lines = "".join(content)
2694 util.atomicFileWrite(path, VAR_RUN, lines)
2695 finally:
2696 if speedFile is not None:
2697 speedFile.close()
2698 Util.log("Closing file: {myfile}".format(myfile=path))
2699 self.unlock()
2701 def recordStorageSpeed(self, startTime, endTime, coalescedSize):
2702 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize)
2703 if speed is None:
2704 return
2706 self.writeSpeedToFile(speed)
2708 def getStorageSpeed(self):
2709 speedFile = None
2710 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2711 self.lock()
2712 try:
2713 speed = None
2714 if os.path.isfile(path):
2715 speedFile = open(path)
2716 content = speedFile.readlines()
2717 try:
2718 content = [float(i) for i in content]
2719 except ValueError:
2720 Util.log("Something bad in the speed log:{log}".
2721 format(log=speedFile.readlines()))
2722 return speed
2724 if len(content):
2725 speed = sum(content) / float(len(content))
2726 if speed <= 0: 2726 ↛ 2728line 2726 didn't jump to line 2728, because the condition on line 2726 was never true
2727 # Defensive, should be impossible.
2728 Util.log("Bad speed: {speed} calculated for SR: {uuid}".
2729 format(speed=speed, uuid=self.uuid))
2730 speed = None
2731 else:
2732 Util.log("Speed file empty for SR: {uuid}".
2733 format(uuid=self.uuid))
2734 else:
2735 Util.log("Speed log missing for SR: {uuid}".
2736 format(uuid=self.uuid))
2737 return speed
2738 finally:
2739 if not (speedFile is None):
2740 speedFile.close()
2741 self.unlock()
2743 def _snapshotCoalesce(self, vdi):
2744 # Note that because we are not holding any locks here, concurrent SM
2745 # operations may change this tree under our feet. In particular, vdi
2746 # can be deleted, or it can be snapshotted.
2747 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED)
2748 Util.log("Single-snapshotting %s" % vdi)
2749 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid)
2750 try:
2751 ret = self.xapi.singleSnapshotVDI(vdi)
2752 Util.log("Single-snapshot returned: %s" % ret)
2753 except XenAPI.Failure as e:
2754 if util.isInvalidVDI(e):
2755 Util.log("The VDI appears to have been concurrently deleted")
2756 return False
2757 raise
2758 self.scanLocked()
2759 tempSnap = vdi.parent
2760 if not tempSnap.isCoalesceable():
2761 Util.log("The VDI appears to have been concurrently snapshotted")
2762 return False
2763 Util.log("Coalescing parent %s" % tempSnap)
2764 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
2765 sizePhys = vdi.getSizePhys()
2766 self._coalesce(tempSnap)
2767 if not vdi.isLeafCoalesceable():
2768 Util.log("The VDI tree appears to have been altered since")
2769 return False
2770 return True
2772 def _liveLeafCoalesce(self, vdi: VDI) -> bool:
2773 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
2774 self.lock()
2775 try:
2776 self.scan()
2777 if not self.getVDI(vdi.uuid):
2778 Util.log("The VDI appears to have been deleted meanwhile")
2779 return False
2780 if not vdi.isLeafCoalesceable():
2781 Util.log("The VDI is no longer leaf-coalesceable")
2782 return False
2784 uuid = vdi.uuid
2785 vdi.pause(failfast=True)
2786 try:
2787 try:
2788 # "vdi" object will no longer be valid after this call
2789 self._create_running_file(vdi)
2790 self._doCoalesceLeaf(vdi)
2791 except:
2792 Util.logException("_doCoalesceLeaf")
2793 self._handleInterruptedCoalesceLeaf()
2794 raise
2795 finally:
2796 vdi = self.getVDI(uuid)
2797 if vdi:
2798 vdi.ensureUnpaused()
2799 self._delete_running_file(vdi)
2800 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid)
2801 if vdiOld:
2802 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2803 self.deleteVDI(vdiOld)
2804 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2805 finally:
2806 self.cleanup()
2807 self.unlock()
2808 self.logFilter.logState()
2809 return True
2811 def _doCoalesceLeaf(self, vdi: VDI):
2812 """Actual coalescing of a leaf VDI onto parent. Must be called in an
2813 offline/atomic context"""
2814 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid)
2815 self._prepareCoalesceLeaf(vdi)
2816 vdi.parent._setHidden(False)
2817 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False)
2818 vdi.validate(True)
2819 vdi.parent.validate(True)
2820 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid)
2821 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT
2822 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2822 ↛ 2823line 2822 didn't jump to line 2823, because the condition on line 2822 was never true
2823 Util.log("Leaf-coalesce forced, will not use timeout")
2824 timeout = 0
2825 vdi._coalesceCowImage(timeout)
2826 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid)
2827 vdi.parent.validate(True)
2828 #vdi._verifyContents(timeout / 2)
2830 # rename
2831 vdiUuid = vdi.uuid
2832 oldName = vdi.fileName
2833 origParentUuid = vdi.parent.uuid
2834 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid)
2835 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid)
2836 vdi.parent.rename(vdiUuid)
2837 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid)
2838 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
2840 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is
2841 # garbage
2843 # update the VDI record
2844 if vdi.parent.vdi_type == VdiType.RAW: 2844 ↛ 2845line 2844 didn't jump to line 2845, because the condition on line 2844 was never true
2845 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW)
2846 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS)
2847 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
2849 self._updateNode(vdi)
2851 # delete the obsolete leaf & inflate the parent (in that order, to
2852 # minimize free space requirements)
2853 parent = vdi.parent
2854 vdi._setHidden(True)
2855 vdi.parent.children = []
2856 vdi.parent = None
2858 if parent.parent is None:
2859 parent.delConfig(VDI.DB_VDI_PARENT)
2861 extraSpace = self._calcExtraSpaceNeeded(vdi, parent)
2862 freeSpace = self.getFreeSpace()
2863 if freeSpace < extraSpace: 2863 ↛ 2866line 2863 didn't jump to line 2866, because the condition on line 2863 was never true
2864 # don't delete unless we need the space: deletion is time-consuming
2865 # because it requires contacting the slaves, and we're paused here
2866 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2867 self.deleteVDI(vdi)
2868 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2870 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid)
2871 self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
2873 self.forgetVDI(origParentUuid)
2874 self._finishCoalesceLeaf(parent)
2875 self._updateSlavesOnResize(parent)
2877 def _calcExtraSpaceNeeded(self, child, parent) -> int:
2878 assert(VdiType.isCowImage(parent.vdi_type))
2879 extra = child.getSizePhys() - parent.getSizePhys()
2880 if extra < 0: 2880 ↛ 2881line 2880 didn't jump to line 2881, because the condition on line 2880 was never true
2881 extra = 0
2882 return extra
2884 def _prepareCoalesceLeaf(self, vdi) -> None:
2885 pass
2887 def _updateNode(self, vdi) -> None:
2888 pass
2890 def _finishCoalesceLeaf(self, parent) -> None:
2891 pass
2893 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
2894 pass
2896 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None:
2897 pass
2899 def _updateSlavesOnResize(self, vdi) -> None:
2900 pass
2902 def _removeStaleVDIs(self, uuidsPresent) -> None:
2903 for uuid in list(self.vdis.keys()):
2904 if not uuid in uuidsPresent:
2905 Util.log("VDI %s disappeared since last scan" % \
2906 self.vdis[uuid])
2907 del self.vdis[uuid]
2909 def _handleInterruptedCoalesceLeaf(self) -> None:
2910 """An interrupted leaf-coalesce operation may leave the COW tree in an
2911 inconsistent state. If the old-leaf VDI is still present, we revert the
2912 operation (in case the original error is persistent); otherwise we must
2913 finish the operation"""
2914 pass
2916 def _buildTree(self, force):
2917 self.vdiTrees = []
2918 for vdi in self.vdis.values():
2919 if vdi.parentUuid:
2920 parent = self.getVDI(vdi.parentUuid)
2921 if not parent:
2922 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX):
2923 self.vdiTrees.append(vdi)
2924 continue
2925 if force:
2926 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \
2927 (vdi.parentUuid, vdi.uuid))
2928 self.vdiTrees.append(vdi)
2929 continue
2930 else:
2931 raise util.SMException("Parent VDI %s of %s not " \
2932 "found" % (vdi.parentUuid, vdi.uuid))
2933 vdi.parent = parent
2934 parent.children.append(vdi)
2935 else:
2936 self.vdiTrees.append(vdi)
2939class FileSR(SR):
2940 TYPE = SR.TYPE_FILE
2941 CACHE_FILE_EXT = ".vhdcache"
2942 # cache cleanup actions
2943 CACHE_ACTION_KEEP = 0
2944 CACHE_ACTION_REMOVE = 1
2945 CACHE_ACTION_REMOVE_IF_INACTIVE = 2
2947 def __init__(self, uuid, xapi, createLock, force):
2948 SR.__init__(self, uuid, xapi, createLock, force)
2949 self.path = "/var/run/sr-mount/%s" % self.uuid
2950 self.journaler = fjournaler.Journaler(self.path)
2952 @override
2953 def scan(self, force=False) -> None:
2954 if not util.pathexists(self.path):
2955 raise util.SMException("directory %s not found!" % self.uuid)
2957 uuidsPresent: List[str] = []
2959 for vdi_type in VDI_COW_TYPES:
2960 scan_result = self._scan(vdi_type, force)
2961 for uuid, image_info in scan_result.items():
2962 vdi = self.getVDI(uuid)
2963 if not vdi:
2964 self.logFilter.logNewVDI(uuid)
2965 vdi = FileVDI(self, uuid, vdi_type)
2966 self.vdis[uuid] = vdi
2967 vdi.load(image_info)
2968 uuidsPresent.extend(scan_result.keys())
2970 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)]
2971 for rawName in rawList:
2972 uuid = FileVDI.extractUuid(rawName)
2973 uuidsPresent.append(uuid)
2974 vdi = self.getVDI(uuid)
2975 if not vdi:
2976 self.logFilter.logNewVDI(uuid)
2977 vdi = FileVDI(self, uuid, VdiType.RAW)
2978 self.vdis[uuid] = vdi
2979 self._removeStaleVDIs(uuidsPresent)
2980 self._buildTree(force)
2981 self.logFilter.logState()
2982 self._handleInterruptedCoalesceLeaf()
2984 @override
2985 def getFreeSpace(self) -> int:
2986 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
2988 @override
2989 def deleteVDIs(self, vdiList) -> None:
2990 rootDeleted = False
2991 for vdi in vdiList:
2992 if not vdi.parent:
2993 rootDeleted = True
2994 break
2995 SR.deleteVDIs(self, vdiList)
2996 if self.xapi.srRecord["type"] == "nfs" and rootDeleted:
2997 self.xapi.markCacheSRsDirty()
2999 @override
3000 def cleanupCache(self, maxAge=-1) -> int:
3001 """Clean up IntelliCache cache files. Caches for leaf nodes are
3002 removed when the leaf node no longer exists or its allow-caching
3003 attribute is not set. Caches for parent nodes are removed when the
3004 parent node no longer exists or it hasn't been used in more than
3005 <maxAge> hours.
3006 Return number of caches removed.
3007 """
3008 numRemoved = 0
3009 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)]
3010 Util.log("Found %d cache files" % len(cacheFiles))
3011 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge)
3012 for cacheFile in cacheFiles:
3013 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)]
3014 action = self.CACHE_ACTION_KEEP
3015 rec = self.xapi.getRecordVDI(uuid)
3016 if not rec:
3017 Util.log("Cache %s: VDI doesn't exist" % uuid)
3018 action = self.CACHE_ACTION_REMOVE
3019 elif rec["managed"] and not rec["allow_caching"]:
3020 Util.log("Cache %s: caching disabled" % uuid)
3021 action = self.CACHE_ACTION_REMOVE
3022 elif not rec["managed"] and maxAge >= 0:
3023 lastAccess = datetime.datetime.fromtimestamp( \
3024 os.path.getatime(os.path.join(self.path, cacheFile)))
3025 if lastAccess < cutoff:
3026 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge))
3027 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
3029 if action == self.CACHE_ACTION_KEEP:
3030 Util.log("Keeping cache %s" % uuid)
3031 continue
3033 lockId = uuid
3034 parentUuid = None
3035 if rec and rec["managed"]:
3036 parentUuid = rec["sm_config"].get("vhd-parent")
3037 if parentUuid:
3038 lockId = parentUuid
3040 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId)
3041 cacheLock.acquire()
3042 try:
3043 if self._cleanupCache(uuid, action):
3044 numRemoved += 1
3045 finally:
3046 cacheLock.release()
3047 return numRemoved
3049 def _cleanupCache(self, uuid, action):
3050 assert(action != self.CACHE_ACTION_KEEP)
3051 rec = self.xapi.getRecordVDI(uuid)
3052 if rec and rec["allow_caching"]:
3053 Util.log("Cache %s appears to have become valid" % uuid)
3054 return False
3056 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT)
3057 tapdisk = blktap2.Tapdisk.find_by_path(fullPath)
3058 if tapdisk:
3059 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE:
3060 Util.log("Cache %s still in use" % uuid)
3061 return False
3062 Util.log("Shutting down tapdisk for %s" % fullPath)
3063 tapdisk.shutdown()
3065 Util.log("Deleting file %s" % fullPath)
3066 os.unlink(fullPath)
3067 return True
3069 def _isCacheFileName(self, name):
3070 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \
3071 name.endswith(self.CACHE_FILE_EXT)
3073 def _scan(self, vdi_type, force):
3074 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3075 error = False
3076 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type])
3077 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid)
3078 for uuid, vdiInfo in scan_result.items():
3079 if vdiInfo.error:
3080 error = True
3081 break
3082 if not error:
3083 return scan_result
3084 Util.log("Scan error on attempt %d" % i)
3085 if force:
3086 return scan_result
3087 raise util.SMException("Scan error")
3089 @override
3090 def deleteVDI(self, vdi) -> None:
3091 self._checkSlaves(vdi)
3092 SR.deleteVDI(self, vdi)
3094 def _checkSlaves(self, vdi):
3095 onlineHosts = self.xapi.getOnlineHosts()
3096 abortFlag = IPCFlag(self.uuid)
3097 for pbdRecord in self.xapi.getAttachedPBDs():
3098 hostRef = pbdRecord["host"]
3099 if hostRef == self.xapi._hostRef:
3100 continue
3101 if abortFlag.test(FLAG_TYPE_ABORT):
3102 raise AbortException("Aborting due to signal")
3103 try:
3104 self._checkSlave(hostRef, vdi)
3105 except util.CommandException:
3106 if hostRef in onlineHosts:
3107 raise
3109 def _checkSlave(self, hostRef, vdi):
3110 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path})
3111 Util.log("Checking with slave: %s" % repr(call))
3112 _host = self.xapi.session.xenapi.host
3113 text = _host.call_plugin( * call)
3115 @override
3116 def _handleInterruptedCoalesceLeaf(self) -> None:
3117 entries = self.journaler.getAll(VDI.JRN_LEAF)
3118 for uuid, parentUuid in entries.items():
3119 fileList = os.listdir(self.path)
3120 childName = uuid + VdiTypeExtension.VHD
3121 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD
3122 parentName1 = parentUuid + VdiTypeExtension.VHD
3123 parentName2 = parentUuid + VdiTypeExtension.RAW
3124 parentPresent = (parentName1 in fileList or parentName2 in fileList)
3125 if parentPresent or tmpChildName in fileList:
3126 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3127 else:
3128 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3129 self.journaler.remove(VDI.JRN_LEAF, uuid)
3130 vdi = self.getVDI(uuid)
3131 if vdi:
3132 vdi.ensureUnpaused()
3134 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3135 Util.log("*** UNDO LEAF-COALESCE")
3136 parent = self.getVDI(parentUuid)
3137 if not parent:
3138 parent = self.getVDI(childUuid)
3139 if not parent:
3140 raise util.SMException("Neither %s nor %s found" % \
3141 (parentUuid, childUuid))
3142 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3143 parent.rename(parentUuid)
3144 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3146 child = self.getVDI(childUuid)
3147 if not child:
3148 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3149 if not child:
3150 raise util.SMException("Neither %s nor %s found" % \
3151 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3152 Util.log("Renaming child back to %s" % childUuid)
3153 child.rename(childUuid)
3154 Util.log("Updating the VDI record")
3155 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3156 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3157 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3159 if child.isHidden():
3160 child._setHidden(False)
3161 if not parent.isHidden():
3162 parent._setHidden(True)
3163 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3164 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3165 Util.log("*** leaf-coalesce undo successful")
3166 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3167 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3169 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3170 Util.log("*** FINISH LEAF-COALESCE")
3171 vdi = self.getVDI(childUuid)
3172 if not vdi:
3173 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting")
3174 raise util.SMException("VDI %s not found" % childUuid)
3175 try:
3176 self.forgetVDI(parentUuid)
3177 except XenAPI.Failure:
3178 Util.logException('_finishInterruptedCoalesceLeaf')
3179 pass
3180 self._updateSlavesOnResize(vdi)
3181 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3182 Util.log("*** finished leaf-coalesce successfully")
3185class LVMSR(SR):
3186 TYPE = SR.TYPE_LVHD
3187 SUBTYPES = ["lvhdoiscsi", "lvhdohba"]
3189 def __init__(self, uuid, xapi, createLock, force):
3190 SR.__init__(self, uuid, xapi, createLock, force)
3191 self.vgName = "%s%s" % (VG_PREFIX, self.uuid)
3192 self.path = os.path.join(VG_LOCATION, self.vgName)
3194 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid)
3195 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref)
3196 lvm_conf = other_conf.get('lvm-conf') if other_conf else None
3197 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf)
3199 self.lvActivator = LVActivator(self.uuid, self.lvmCache)
3200 self.journaler = journaler.Journaler(self.lvmCache)
3202 @override
3203 def deleteVDI(self, vdi) -> None:
3204 if self.lvActivator.get(vdi.uuid, False):
3205 self.lvActivator.deactivate(vdi.uuid, False)
3206 self._checkSlaves(vdi)
3207 SR.deleteVDI(self, vdi)
3209 @override
3210 def forgetVDI(self, vdiUuid) -> None:
3211 SR.forgetVDI(self, vdiUuid)
3212 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME)
3213 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
3215 @override
3216 def getFreeSpace(self) -> int:
3217 stats = lvutil._getVGstats(self.vgName)
3218 return stats['physical_size'] - stats['physical_utilisation']
3220 @override
3221 def cleanup(self):
3222 if not self.lvActivator.deactivateAll():
3223 Util.log("ERROR deactivating LVs while cleaning up")
3225 @override
3226 def needUpdateBlockInfo(self) -> bool:
3227 for vdi in self.vdis.values():
3228 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3229 continue
3230 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3231 return True
3232 return False
3234 @override
3235 def updateBlockInfo(self) -> None:
3236 numUpdated = 0
3237 for vdi in self.vdis.values():
3238 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3239 continue
3240 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3241 vdi.updateBlockInfo()
3242 numUpdated += 1
3243 if numUpdated:
3244 # deactivate the LVs back sooner rather than later. If we don't
3245 # now, by the time this thread gets to deactivations, another one
3246 # might have leaf-coalesced a node and deleted it, making the child
3247 # inherit the refcount value and preventing the correct decrement
3248 self.cleanup()
3250 @override
3251 def scan(self, force=False) -> None:
3252 vdis = self._scan(force)
3253 for uuid, vdiInfo in vdis.items():
3254 vdi = self.getVDI(uuid)
3255 if not vdi:
3256 self.logFilter.logNewVDI(uuid)
3257 vdi = LVMVDI(self, uuid, vdiInfo.vdiType)
3258 self.vdis[uuid] = vdi
3259 vdi.load(vdiInfo)
3260 self._removeStaleVDIs(vdis.keys())
3261 self._buildTree(force)
3262 self.logFilter.logState()
3263 self._handleInterruptedCoalesceLeaf()
3265 def _scan(self, force):
3266 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3267 error = False
3268 self.lvmCache.refresh()
3269 vdis = LvmCowUtil.getVDIInfo(self.lvmCache)
3270 for uuid, vdiInfo in vdis.items():
3271 if vdiInfo.scanError:
3272 error = True
3273 break
3274 if not error:
3275 return vdis
3276 Util.log("Scan error, retrying (%d)" % i)
3277 if force:
3278 return vdis
3279 raise util.SMException("Scan error")
3281 @override
3282 def _removeStaleVDIs(self, uuidsPresent) -> None:
3283 for uuid in list(self.vdis.keys()):
3284 if not uuid in uuidsPresent:
3285 Util.log("VDI %s disappeared since last scan" % \
3286 self.vdis[uuid])
3287 del self.vdis[uuid]
3288 if self.lvActivator.get(uuid, False):
3289 self.lvActivator.remove(uuid, False)
3291 @override
3292 def _liveLeafCoalesce(self, vdi) -> bool:
3293 """If the parent is raw and the child was resized (virt. size), then
3294 we'll need to resize the parent, which can take a while due to zeroing
3295 out of the extended portion of the LV. Do it before pausing the child
3296 to avoid a protracted downtime"""
3297 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt:
3298 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3299 vdi.parent._increaseSizeVirt(vdi.sizeVirt)
3301 return SR._liveLeafCoalesce(self, vdi)
3303 @override
3304 def _prepareCoalesceLeaf(self, vdi) -> None:
3305 vdi._activateChain()
3306 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3307 vdi.deflate()
3308 vdi.inflateParentForCoalesce()
3310 @override
3311 def _updateNode(self, vdi) -> None:
3312 # fix the refcounts: the remaining node should inherit the binary
3313 # refcount from the leaf (because if it was online, it should remain
3314 # refcounted as such), but the normal refcount from the parent (because
3315 # this node is really the parent node) - minus 1 if it is online (since
3316 # non-leaf nodes increment their normal counts when they are online and
3317 # we are now a leaf, storing that 1 in the binary refcount).
3318 ns = NS_PREFIX_LVM + self.uuid
3319 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns)
3320 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns)
3321 pCnt = pCnt - cBcnt
3322 assert(pCnt >= 0)
3323 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
3325 @override
3326 def _finishCoalesceLeaf(self, parent) -> None:
3327 if not parent.isSnapshot() or parent.isAttachedRW():
3328 parent.inflateFully()
3329 else:
3330 parent.deflate()
3332 @override
3333 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3334 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV
3336 @override
3337 def _handleInterruptedCoalesceLeaf(self) -> None:
3338 entries = self.journaler.getAll(VDI.JRN_LEAF)
3339 for uuid, parentUuid in entries.items():
3340 undo = False
3341 for prefix in LV_PREFIX.values():
3342 parentLV = prefix + parentUuid
3343 undo = self.lvmCache.checkLV(parentLV)
3344 if undo:
3345 break
3347 if not undo:
3348 for prefix in LV_PREFIX.values():
3349 tmpChildLV = prefix + uuid
3350 undo = self.lvmCache.checkLV(tmpChildLV)
3351 if undo:
3352 break
3354 if undo:
3355 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3356 else:
3357 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3358 self.journaler.remove(VDI.JRN_LEAF, uuid)
3359 vdi = self.getVDI(uuid)
3360 if vdi:
3361 vdi.ensureUnpaused()
3363 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3364 Util.log("*** UNDO LEAF-COALESCE")
3365 parent = self.getVDI(parentUuid)
3366 if not parent:
3367 parent = self.getVDI(childUuid)
3368 if not parent:
3369 raise util.SMException("Neither %s nor %s found" % \
3370 (parentUuid, childUuid))
3371 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3372 parent.rename(parentUuid)
3373 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3375 child = self.getVDI(childUuid)
3376 if not child:
3377 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3378 if not child:
3379 raise util.SMException("Neither %s nor %s found" % \
3380 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3381 Util.log("Renaming child back to %s" % childUuid)
3382 child.rename(childUuid)
3383 Util.log("Updating the VDI record")
3384 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3385 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3386 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3388 # refcount (best effort - assume that it had succeeded if the
3389 # second rename succeeded; if not, this adjustment will be wrong,
3390 # leading to a non-deactivation of the LV)
3391 ns = NS_PREFIX_LVM + self.uuid
3392 cCnt, cBcnt = RefCounter.check(child.uuid, ns)
3393 pCnt, pBcnt = RefCounter.check(parent.uuid, ns)
3394 pCnt = pCnt + cBcnt
3395 RefCounter.set(parent.uuid, pCnt, 0, ns)
3396 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
3398 parent.deflate()
3399 child.inflateFully()
3400 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid)
3401 if child.isHidden():
3402 child._setHidden(False)
3403 if not parent.isHidden():
3404 parent._setHidden(True)
3405 if not parent.lvReadonly:
3406 self.lvmCache.setReadonly(parent.fileName, True)
3407 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3408 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3409 Util.log("*** leaf-coalesce undo successful")
3410 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3411 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3413 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3414 Util.log("*** FINISH LEAF-COALESCE")
3415 vdi = self.getVDI(childUuid)
3416 if not vdi:
3417 raise util.SMException("VDI %s not found" % childUuid)
3418 vdi.inflateFully()
3419 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid)
3420 try:
3421 self.forgetVDI(parentUuid)
3422 except XenAPI.Failure:
3423 pass
3424 self._updateSlavesOnResize(vdi)
3425 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3426 Util.log("*** finished leaf-coalesce successfully")
3428 def _checkSlaves(self, vdi):
3429 """Confirm with all slaves in the pool that 'vdi' is not in use. We
3430 try to check all slaves, including those that the Agent believes are
3431 offline, but ignore failures for offline hosts. This is to avoid cases
3432 where the Agent thinks a host is offline but the host is up."""
3433 args = {"vgName": self.vgName,
3434 "action1": "deactivateNoRefcount",
3435 "lvName1": vdi.fileName,
3436 "action2": "cleanupLockAndRefcount",
3437 "uuid2": vdi.uuid,
3438 "ns2": NS_PREFIX_LVM + self.uuid}
3439 onlineHosts = self.xapi.getOnlineHosts()
3440 abortFlag = IPCFlag(self.uuid)
3441 for pbdRecord in self.xapi.getAttachedPBDs():
3442 hostRef = pbdRecord["host"]
3443 if hostRef == self.xapi._hostRef:
3444 continue
3445 if abortFlag.test(FLAG_TYPE_ABORT):
3446 raise AbortException("Aborting due to signal")
3447 Util.log("Checking with slave %s (path %s)" % (
3448 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path))
3449 try:
3450 self.xapi.ensureInactive(hostRef, args)
3451 except XenAPI.Failure:
3452 if hostRef in onlineHosts:
3453 raise
3455 @override
3456 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
3457 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid])
3458 if not slaves:
3459 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \
3460 child)
3461 return
3463 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid
3464 args = {"vgName": self.vgName,
3465 "action1": "deactivateNoRefcount",
3466 "lvName1": tmpName,
3467 "action2": "deactivateNoRefcount",
3468 "lvName2": child.fileName,
3469 "action3": "refresh",
3470 "lvName3": child.fileName,
3471 "action4": "refresh",
3472 "lvName4": parent.fileName}
3473 for slave in slaves:
3474 Util.log("Updating %s, %s, %s on slave %s" % \
3475 (tmpName, child.fileName, parent.fileName,
3476 self.xapi.getRecordHost(slave)['hostname']))
3477 text = self.xapi.session.xenapi.host.call_plugin( \
3478 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3479 Util.log("call-plugin returned: '%s'" % text)
3481 @override
3482 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None:
3483 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid])
3484 if not slaves:
3485 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi)
3486 return
3488 args = {"vgName": self.vgName,
3489 "action1": "deactivateNoRefcount",
3490 "lvName1": oldNameLV,
3491 "action2": "refresh",
3492 "lvName2": vdi.fileName,
3493 "action3": "cleanupLockAndRefcount",
3494 "uuid3": origParentUuid,
3495 "ns3": NS_PREFIX_LVM + self.uuid}
3496 for slave in slaves:
3497 Util.log("Updating %s to %s on slave %s" % \
3498 (oldNameLV, vdi.fileName,
3499 self.xapi.getRecordHost(slave)['hostname']))
3500 text = self.xapi.session.xenapi.host.call_plugin( \
3501 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3502 Util.log("call-plugin returned: '%s'" % text)
3504 @override
3505 def _updateSlavesOnResize(self, vdi) -> None:
3506 uuids = [x.uuid for x in vdi.getAllLeaves()]
3507 slaves = util.get_slaves_attached_on(self.xapi.session, uuids)
3508 if not slaves:
3509 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi)
3510 return
3511 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName,
3512 vdi.fileName, vdi.uuid, slaves)
3515class LinstorSR(SR):
3516 TYPE = SR.TYPE_LINSTOR
3518 def __init__(self, uuid, xapi, createLock, force):
3519 if not LINSTOR_AVAILABLE:
3520 raise util.SMException(
3521 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing'
3522 )
3524 SR.__init__(self, uuid, xapi, createLock, force)
3525 self.path = LinstorVolumeManager.DEV_ROOT_PATH
3527 class LinstorProxy:
3528 def __init__(self, sr: LinstorSR) -> None:
3529 self.sr = sr
3531 def __getattr__(self, attr: str) -> Any:
3532 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance"
3533 return getattr(self.sr._linstor, attr)
3535 self._linstor_proxy = LinstorProxy(self)
3536 self._reloadLinstor(journaler_only=True)
3538 @override
3539 def deleteVDI(self, vdi) -> None:
3540 self._checkSlaves(vdi)
3541 SR.deleteVDI(self, vdi)
3543 @override
3544 def getFreeSpace(self) -> int:
3545 return self._linstor.max_volume_size_allowed
3547 @override
3548 def scan(self, force=False) -> None:
3549 all_vdi_info = self._scan(force)
3550 for uuid, vdiInfo in all_vdi_info.items():
3551 # When vdiInfo is None, the VDI is RAW.
3552 vdi = self.getVDI(uuid)
3553 if not vdi:
3554 self.logFilter.logNewVDI(uuid)
3555 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW)
3556 self.vdis[uuid] = vdi
3557 if vdiInfo:
3558 vdi.load(vdiInfo)
3559 self._removeStaleVDIs(all_vdi_info.keys())
3560 self._buildTree(force)
3561 self.logFilter.logState()
3562 self._handleInterruptedCoalesceLeaf()
3564 @override
3565 def pauseVDIs(self, vdiList) -> None:
3566 self._linstor.ensure_volume_list_is_not_locked(
3567 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3568 )
3569 return super(LinstorSR, self).pauseVDIs(vdiList)
3571 def _reloadLinstor(self, journaler_only=False):
3572 session = self.xapi.session
3573 host_ref = util.get_this_host_ref(session)
3574 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
3576 pbd = util.find_my_pbd(session, host_ref, sr_ref)
3577 if pbd is None:
3578 raise util.SMException('Failed to find PBD')
3580 dconf = session.xenapi.PBD.get_device_config(pbd)
3581 group_name = dconf['group-name']
3583 controller_uri = get_controller_uri()
3584 self.journaler = LinstorJournaler(
3585 controller_uri, group_name, logger=util.SMlog
3586 )
3588 if journaler_only:
3589 return
3591 self._linstor = LinstorVolumeManager(
3592 controller_uri,
3593 group_name,
3594 repair=True,
3595 logger=util.SMlog
3596 )
3598 def _scan(self, force):
3599 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3600 self._reloadLinstor()
3601 error = False
3602 try:
3603 all_vdi_info = self._load_vdi_info()
3604 for uuid, vdiInfo in all_vdi_info.items():
3605 if vdiInfo and vdiInfo.error:
3606 error = True
3607 break
3608 if not error:
3609 return all_vdi_info
3610 Util.log('Scan error, retrying ({})'.format(i))
3611 except Exception as e:
3612 Util.log('Scan exception, retrying ({}): {}'.format(i, e))
3613 Util.log(traceback.format_exc())
3615 if force:
3616 return all_vdi_info
3617 raise util.SMException('Scan error')
3619 def _load_vdi_info(self):
3620 all_vdi_info = {}
3622 # TODO: Ensure metadata contains the right info.
3624 all_volume_info = self._linstor.get_volumes_with_info()
3625 volumes_metadata = self._linstor.get_volumes_with_metadata()
3626 for vdi_uuid, volume_info in all_volume_info.items():
3627 vdi_type = VdiType.RAW
3628 try:
3629 volume_metadata = volumes_metadata[vdi_uuid]
3630 if not volume_info.name and not list(volume_metadata.items()):
3631 continue # Ignore it, probably deleted.
3633 if vdi_uuid.startswith('DELETED_'):
3634 # Assume it's really a RAW volume of a failed snap without COW header/footer.
3635 # We must remove this VDI now without adding it in the VDI list.
3636 # Otherwise `Relinking` calls and other actions can be launched on it.
3637 # We don't want that...
3638 Util.log('Deleting bad VDI {}'.format(vdi_uuid))
3640 self.lock()
3641 try:
3642 self._linstor.destroy_volume(vdi_uuid)
3643 try:
3644 self.forgetVDI(vdi_uuid)
3645 except:
3646 pass
3647 except Exception as e:
3648 Util.log('Cannot delete bad VDI: {}'.format(e))
3649 finally:
3650 self.unlock()
3651 continue
3653 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
3654 volume_name = self._linstor.get_volume_name(vdi_uuid)
3655 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX):
3656 # Always RAW!
3657 info = None
3658 elif VdiType.isCowImage(vdi_type):
3659 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid)
3660 else:
3661 # Ensure it's not a COW image...
3662 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type)
3663 try:
3664 info = linstorcowutil.get_info(vdi_uuid)
3665 except:
3666 try:
3667 linstorcowutil.force_repair(
3668 self._linstor.get_device_path(vdi_uuid)
3669 )
3670 info = linstorcowutil.get_info(vdi_uuid)
3671 except:
3672 info = None
3674 except Exception as e:
3675 Util.log(
3676 ' [VDI {}: failed to load VDI info]: {}'
3677 .format(vdi_uuid, e)
3678 )
3679 info = CowImageInfo(vdi_uuid)
3680 info.error = 1
3682 if info:
3683 info.vdiType = vdi_type
3685 all_vdi_info[vdi_uuid] = info
3687 return all_vdi_info
3689 @override
3690 def _prepareCoalesceLeaf(self, vdi) -> None:
3691 vdi._activateChain()
3692 vdi.deflate()
3693 vdi._inflateParentForCoalesce()
3695 @override
3696 def _finishCoalesceLeaf(self, parent) -> None:
3697 if not parent.isSnapshot() or parent.isAttachedRW():
3698 parent.inflateFully()
3699 else:
3700 parent.deflate()
3702 @override
3703 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3704 return LinstorCowUtil(
3705 self.xapi.session, self._linstor, parent.vdi_type
3706 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize()
3708 def _hasValidDevicePath(self, uuid):
3709 try:
3710 self._linstor.get_device_path(uuid)
3711 except Exception:
3712 # TODO: Maybe log exception.
3713 return False
3714 return True
3716 @override
3717 def _liveLeafCoalesce(self, vdi) -> bool:
3718 self.lock()
3719 try:
3720 self._linstor.ensure_volume_is_not_locked(
3721 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3722 )
3723 return super(LinstorSR, self)._liveLeafCoalesce(vdi)
3724 finally:
3725 self.unlock()
3727 @override
3728 def _handleInterruptedCoalesceLeaf(self) -> None:
3729 entries = self.journaler.get_all(VDI.JRN_LEAF)
3730 for uuid, parentUuid in entries.items():
3731 if self._hasValidDevicePath(parentUuid) or \
3732 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid):
3733 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3734 else:
3735 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3736 self.journaler.remove(VDI.JRN_LEAF, uuid)
3737 vdi = self.getVDI(uuid)
3738 if vdi:
3739 vdi.ensureUnpaused()
3741 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3742 Util.log('*** UNDO LEAF-COALESCE')
3743 parent = self.getVDI(parentUuid)
3744 if not parent:
3745 parent = self.getVDI(childUuid)
3746 if not parent:
3747 raise util.SMException(
3748 'Neither {} nor {} found'.format(parentUuid, childUuid)
3749 )
3750 Util.log(
3751 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
3752 )
3753 parent.rename(parentUuid)
3755 child = self.getVDI(childUuid)
3756 if not child:
3757 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3758 if not child:
3759 raise util.SMException(
3760 'Neither {} nor {} found'.format(
3761 childUuid, self.TMP_RENAME_PREFIX + childUuid
3762 )
3763 )
3764 Util.log('Renaming child back to {}'.format(childUuid))
3765 child.rename(childUuid)
3766 Util.log('Updating the VDI record')
3767 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3768 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3770 # TODO: Maybe deflate here.
3772 if child.isHidden():
3773 child._setHidden(False)
3774 if not parent.isHidden():
3775 parent._setHidden(True)
3776 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3777 Util.log('*** leaf-coalesce undo successful')
3779 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3780 Util.log('*** FINISH LEAF-COALESCE')
3781 vdi = self.getVDI(childUuid)
3782 if not vdi:
3783 raise util.SMException('VDI {} not found'.format(childUuid))
3784 # TODO: Maybe inflate.
3785 try:
3786 self.forgetVDI(parentUuid)
3787 except XenAPI.Failure:
3788 pass
3789 self._updateSlavesOnResize(vdi)
3790 Util.log('*** finished leaf-coalesce successfully')
3792 def _checkSlaves(self, vdi):
3793 try:
3794 all_openers = self._linstor.get_volume_openers(vdi.uuid)
3795 for openers in all_openers.values():
3796 for opener in openers.values():
3797 if opener['process-name'] != 'tapdisk':
3798 raise util.SMException(
3799 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3800 )
3801 except LinstorVolumeManagerError as e:
3802 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
3803 raise
3806################################################################################
3807#
3808# Helpers
3809#
3810def daemonize():
3811 pid = os.fork()
3812 if pid:
3813 os.waitpid(pid, 0)
3814 Util.log("New PID [%d]" % pid)
3815 return False
3816 os.chdir("/")
3817 os.setsid()
3818 pid = os.fork()
3819 if pid:
3820 Util.log("Will finish as PID [%d]" % pid)
3821 os._exit(0)
3822 for fd in [0, 1, 2]:
3823 try:
3824 os.close(fd)
3825 except OSError:
3826 pass
3827 # we need to fill those special fd numbers or pread won't work
3828 sys.stdin = open("/dev/null", 'r')
3829 sys.stderr = open("/dev/null", 'w')
3830 sys.stdout = open("/dev/null", 'w')
3831 # As we're a new process we need to clear the lock objects
3832 lock.Lock.clearAll()
3833 return True
3836def normalizeType(type):
3837 if type in LVMSR.SUBTYPES:
3838 type = SR.TYPE_LVHD
3839 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]:
3840 # temporary while LVHD is symlinked as LVM
3841 type = SR.TYPE_LVHD
3842 if type in [
3843 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs",
3844 "moosefs", "xfs", "zfs", "largeblock"
3845 ]:
3846 type = SR.TYPE_FILE
3847 if type in ["linstor"]:
3848 type = SR.TYPE_LINSTOR
3849 if type not in SR.TYPES:
3850 raise util.SMException("Unsupported SR type: %s" % type)
3851 return type
3853GCPAUSE_DEFAULT_SLEEP = 5 * 60
3856def _gc_init_file(sr_uuid):
3857 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init')
3860def _create_init_file(sr_uuid):
3861 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid)))
3862 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f:
3863 f.write('1')
3866def _gcLoopPause(sr, dryRun=False, immediate=False):
3867 if immediate:
3868 return
3870 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist
3871 # point will just return. Otherwise, fall back on an abortable sleep.
3873 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT):
3875 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3875 ↛ exitline 3875 didn't jump to the function exit
3876 lambda *args: None)
3877 elif os.path.exists(_gc_init_file(sr.uuid)):
3878 def abortTest():
3879 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
3881 # If time.sleep hangs we are in deep trouble, however for
3882 # completeness we set the timeout of the abort thread to
3883 # 110% of GCPAUSE_DEFAULT_SLEEP.
3884 Util.log("GC active, about to go quiet")
3885 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3885 ↛ exitline 3885 didn't run the lambda on line 3885
3886 None, sr.uuid, abortTest, VDI.POLL_INTERVAL,
3887 GCPAUSE_DEFAULT_SLEEP * 1.1)
3888 Util.log("GC active, quiet period ended")
3891def _gcLoop(sr, dryRun=False, immediate=False):
3892 if not lockGCActive.acquireNoblock(): 3892 ↛ 3893line 3892 didn't jump to line 3893, because the condition on line 3892 was never true
3893 Util.log("Another GC instance already active, exiting")
3894 return
3896 # Check we're still attached after acquiring locks
3897 if not sr.xapi.isPluggedHere():
3898 Util.log("SR no longer attached, exiting")
3899 return
3901 # Clean up Intellicache files
3902 sr.cleanupCache()
3904 # Track how many we do
3905 coalesced = 0
3906 task_status = "success"
3907 try:
3908 # Check if any work needs to be done
3909 if not sr.xapi.isPluggedHere(): 3909 ↛ 3910line 3909 didn't jump to line 3910, because the condition on line 3909 was never true
3910 Util.log("SR no longer attached, exiting")
3911 return
3912 sr.scanLocked()
3913 if not sr.hasWork():
3914 Util.log("No work, exiting")
3915 return
3916 sr.xapi.create_task(
3917 "Garbage Collection",
3918 "Garbage collection for SR %s" % sr.uuid)
3919 _gcLoopPause(sr, dryRun, immediate=immediate)
3920 while True:
3921 if SIGTERM:
3922 Util.log("Term requested")
3923 return
3925 if not sr.xapi.isPluggedHere(): 3925 ↛ 3926line 3925 didn't jump to line 3926, because the condition on line 3925 was never true
3926 Util.log("SR no longer attached, exiting")
3927 break
3928 sr.scanLocked()
3929 if not sr.hasWork():
3930 Util.log("No work, exiting")
3931 break
3933 if not lockGCRunning.acquireNoblock(): 3933 ↛ 3934line 3933 didn't jump to line 3934, because the condition on line 3933 was never true
3934 Util.log("Unable to acquire GC running lock.")
3935 return
3936 try:
3937 if not sr.gcEnabled(): 3937 ↛ 3938line 3937 didn't jump to line 3938, because the condition on line 3937 was never true
3938 break
3940 sr.xapi.update_task_progress("done", coalesced)
3942 sr.cleanupCoalesceJournals()
3943 # Create the init file here in case startup is waiting on it
3944 _create_init_file(sr.uuid)
3945 sr.scanLocked()
3946 sr.updateBlockInfo()
3948 howmany = len(sr.findGarbage())
3949 if howmany > 0:
3950 Util.log("Found %d orphaned vdis" % howmany)
3951 sr.lock()
3952 try:
3953 sr.garbageCollect(dryRun)
3954 finally:
3955 sr.unlock()
3956 sr.xapi.srUpdate()
3958 candidate = sr.findCoalesceable()
3959 if candidate:
3960 util.fistpoint.activate(
3961 "LVHDRT_finding_a_suitable_pair", sr.uuid)
3962 sr.coalesce(candidate, dryRun)
3963 sr.xapi.srUpdate()
3964 coalesced += 1
3965 continue
3967 candidate = sr.findLeafCoalesceable()
3968 if candidate: 3968 ↛ 3975line 3968 didn't jump to line 3975, because the condition on line 3968 was never false
3969 sr.coalesceLeaf(candidate, dryRun)
3970 sr.xapi.srUpdate()
3971 coalesced += 1
3972 continue
3974 finally:
3975 lockGCRunning.release() 3975 ↛ 3980line 3975 didn't jump to line 3980, because the break on line 3938 wasn't executed
3976 except:
3977 task_status = "failure"
3978 raise
3979 finally:
3980 sr.xapi.set_task_status(task_status)
3981 Util.log("GC process exiting, no work left")
3982 _create_init_file(sr.uuid)
3983 lockGCActive.release()
3986def _gc(session, srUuid, dryRun=False, immediate=False):
3987 init(srUuid)
3988 sr = SR.getInstance(srUuid, session)
3989 if not sr.gcEnabled(False): 3989 ↛ 3990line 3989 didn't jump to line 3990, because the condition on line 3989 was never true
3990 return
3992 try:
3993 _gcLoop(sr, dryRun, immediate=immediate)
3994 finally:
3995 sr.check_no_space_candidates()
3996 sr.cleanup()
3997 sr.logFilter.logState()
3998 del sr.xapi
4001def _abort(srUuid, soft=False):
4002 """Aborts an GC/coalesce.
4004 srUuid: the UUID of the SR whose GC/coalesce must be aborted
4005 soft: If set to True and there is a pending abort signal, the function
4006 doesn't do anything. If set to False, a new abort signal is issued.
4008 returns: If soft is set to False, we return True holding lockGCActive. If
4009 soft is set to False and an abort signal is pending, we return False
4010 without holding lockGCActive. An exception is raised in case of error."""
4011 Util.log("=== SR %s: abort ===" % (srUuid))
4012 init(srUuid)
4013 if not lockGCActive.acquireNoblock():
4014 gotLock = False
4015 Util.log("Aborting currently-running instance (SR %s)" % srUuid)
4016 abortFlag = IPCFlag(srUuid)
4017 if not abortFlag.set(FLAG_TYPE_ABORT, soft):
4018 return False
4019 for i in range(SR.LOCK_RETRY_ATTEMPTS):
4020 gotLock = lockGCActive.acquireNoblock()
4021 if gotLock:
4022 break
4023 time.sleep(SR.LOCK_RETRY_INTERVAL)
4024 abortFlag.clear(FLAG_TYPE_ABORT)
4025 if not gotLock:
4026 raise util.CommandException(code=errno.ETIMEDOUT,
4027 reason="SR %s: error aborting existing process" % srUuid)
4028 return True
4031def init(srUuid):
4032 global lockGCRunning
4033 if not lockGCRunning: 4033 ↛ 4034line 4033 didn't jump to line 4034, because the condition on line 4033 was never true
4034 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
4035 global lockGCActive
4036 if not lockGCActive: 4036 ↛ 4037line 4036 didn't jump to line 4037, because the condition on line 4036 was never true
4037 lockGCActive = LockActive(srUuid)
4040class LockActive:
4041 """
4042 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired
4043 if another process holds the SR lock.
4044 """
4045 def __init__(self, srUuid):
4046 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
4047 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid)
4049 def acquireNoblock(self):
4050 self._srLock.acquire()
4052 try:
4053 return self._lock.acquireNoblock()
4054 finally:
4055 self._srLock.release()
4057 def release(self):
4058 self._lock.release()
4061def usage():
4062 output = """Garbage collect and/or coalesce COW images in a COW-based SR
4064Parameters:
4065 -u --uuid UUID SR UUID
4066 and one of:
4067 -g --gc garbage collect, coalesce, and repeat while there is work
4068 -G --gc_force garbage collect once, aborting any current operations
4069 -c --cache-clean <max_age> clean up IntelliCache cache files older than
4070 max_age hours
4071 -a --abort abort any currently running operation (GC or coalesce)
4072 -q --query query the current state (GC'ing, coalescing or not running)
4073 -x --disable disable GC/coalesce (will be in effect until you exit)
4074 -t --debug see Debug below
4076Options:
4077 -b --background run in background (return immediately) (valid for -g only)
4078 -f --force continue in the presence of COW images with errors (when doing
4079 GC, this might cause removal of any such images) (only valid
4080 for -G) (DANGEROUS)
4082Debug:
4083 The --debug parameter enables manipulation of LVHD VDIs for debugging
4084 purposes. ** NEVER USE IT ON A LIVE VM **
4085 The following parameters are required:
4086 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate",
4087 "deflate".
4088 -v --vdi_uuid VDI UUID
4089 """
4090 #-d --dry-run don't actually perform any SR-modifying operations
4091 print(output)
4092 Util.log("(Invalid usage)")
4093 sys.exit(1)
4096##############################################################################
4097#
4098# API
4099#
4100def abort(srUuid, soft=False):
4101 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair.
4102 """
4103 if _abort(srUuid, soft):
4104 Util.log("abort: releasing the process lock")
4105 lockGCActive.release()
4106 return True
4107 else:
4108 return False
4111def gc(session, srUuid, inBackground, dryRun=False):
4112 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return
4113 immediately if inBackground=True.
4115 The following algorithm is used:
4116 1. If we are already GC'ing in this SR, return
4117 2. If we are already coalescing a VDI pair:
4118 a. Scan the SR and determine if the VDI pair is GC'able
4119 b. If the pair is not GC'able, return
4120 c. If the pair is GC'able, abort coalesce
4121 3. Scan the SR
4122 4. If there is nothing to collect, nor to coalesce, return
4123 5. If there is something to collect, GC all, then goto 3
4124 6. If there is something to coalesce, coalesce one pair, then goto 3
4125 """
4126 Util.log("=== SR %s: gc ===" % srUuid)
4128 signal.signal(signal.SIGTERM, receiveSignal)
4130 if inBackground:
4131 if daemonize(): 4131 ↛ exitline 4131 didn't return from function 'gc', because the condition on line 4131 was never false
4132 # we are now running in the background. Catch & log any errors
4133 # because there is no other way to propagate them back at this
4134 # point
4136 try:
4137 _gc(None, srUuid, dryRun)
4138 except AbortException:
4139 Util.log("Aborted")
4140 except Exception:
4141 Util.logException("gc")
4142 Util.log("* * * * * SR %s: ERROR\n" % srUuid)
4143 os._exit(0)
4144 else:
4145 _gc(session, srUuid, dryRun, immediate=True)
4148def start_gc(session, sr_uuid):
4149 """
4150 This function is used to try to start a backgrounded GC session by forking
4151 the current process. If using the systemd version, call start_gc_service() instead.
4152 """
4153 # don't bother if an instance already running (this is just an
4154 # optimization to reduce the overhead of forking a new process if we
4155 # don't have to, but the process will check the lock anyways)
4156 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
4157 if not lockRunning.acquireNoblock():
4158 if should_preempt(session, sr_uuid):
4159 util.SMlog("Aborting currently-running coalesce of garbage VDI")
4160 try:
4161 if not abort(sr_uuid, soft=True):
4162 util.SMlog("The GC has already been scheduled to re-start")
4163 except util.CommandException as e:
4164 if e.code != errno.ETIMEDOUT:
4165 raise
4166 util.SMlog('failed to abort the GC')
4167 else:
4168 util.SMlog("A GC instance already running, not kicking")
4169 return
4170 else:
4171 lockRunning.release()
4173 util.SMlog(f"Starting GC file is {__file__}")
4174 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
4175 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4177def start_gc_service(sr_uuid, wait=False):
4178 """
4179 This starts the templated systemd service which runs GC on the given SR UUID.
4180 If the service was already started, this is a no-op.
4182 Because the service is a one-shot with RemainAfterExit=no, when called with
4183 wait=True this will run the service synchronously and will not return until the
4184 run has finished. This is used to force a run of the GC instead of just kicking it
4185 in the background.
4186 """
4187 sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
4188 util.SMlog(f"Kicking SMGC@{sr_uuid}...")
4189 cmd=[ "/usr/bin/systemctl", "--quiet" ]
4190 if not wait: 4190 ↛ 4192line 4190 didn't jump to line 4192, because the condition on line 4190 was never false
4191 cmd.append("--no-block")
4192 cmd += ["start", f"SMGC@{sr_uuid_esc}"]
4193 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4196def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
4197 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
4198 the SR lock is held.
4199 The following algorithm is used:
4200 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce
4201 2. Scan the SR
4202 3. GC
4203 4. return
4204 """
4205 Util.log("=== SR %s: gc_force ===" % srUuid)
4206 init(srUuid)
4207 sr = SR.getInstance(srUuid, session, lockSR, True)
4208 if not lockGCActive.acquireNoblock():
4209 abort(srUuid)
4210 else:
4211 Util.log("Nothing was running, clear to proceed")
4213 if force:
4214 Util.log("FORCED: will continue even if there are COW image errors")
4215 sr.scanLocked(force)
4216 sr.cleanupCoalesceJournals()
4218 try:
4219 sr.cleanupCache()
4220 sr.garbageCollect(dryRun)
4221 finally:
4222 sr.cleanup()
4223 sr.logFilter.logState()
4224 lockGCActive.release()
4227def get_state(srUuid):
4228 """Return whether GC/coalesce is currently running or not. This asks systemd for
4229 the state of the templated SMGC service and will return True if it is "activating"
4230 or "running" (for completeness, as in practice it will never achieve the latter state)
4231 """
4232 sr_uuid_esc = srUuid.replace("-", "\\x2d")
4233 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
4234 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4235 state = result.stdout.decode('utf-8').rstrip()
4236 if state == "activating" or state == "running":
4237 return True
4238 return False
4241def should_preempt(session, srUuid):
4242 sr = SR.getInstance(srUuid, session)
4243 entries = sr.journaler.getAll(VDI.JRN_COALESCE)
4244 if len(entries) == 0:
4245 return False
4246 elif len(entries) > 1:
4247 raise util.SMException("More than one coalesce entry: " + str(entries))
4248 sr.scanLocked()
4249 coalescedUuid = entries.popitem()[0]
4250 garbage = sr.findGarbage()
4251 for vdi in garbage:
4252 if vdi.uuid == coalescedUuid:
4253 return True
4254 return False
4257def get_coalesceable_leaves(session, srUuid, vdiUuids):
4258 coalesceable = []
4259 sr = SR.getInstance(srUuid, session)
4260 sr.scanLocked()
4261 for uuid in vdiUuids:
4262 vdi = sr.getVDI(uuid)
4263 if not vdi:
4264 raise util.SMException("VDI %s not found" % uuid)
4265 if vdi.isLeafCoalesceable():
4266 coalesceable.append(uuid)
4267 return coalesceable
4270def cache_cleanup(session, srUuid, maxAge):
4271 sr = SR.getInstance(srUuid, session)
4272 return sr.cleanupCache(maxAge)
4275def debug(sr_uuid, cmd, vdi_uuid):
4276 Util.log("Debug command: %s" % cmd)
4277 sr = SR.getInstance(sr_uuid, None)
4278 if not isinstance(sr, LVMSR):
4279 print("Error: not an LVHD SR")
4280 return
4281 sr.scanLocked()
4282 vdi = sr.getVDI(vdi_uuid)
4283 if not vdi:
4284 print("Error: VDI %s not found")
4285 return
4286 print("Running %s on SR %s" % (cmd, sr))
4287 print("VDI before: %s" % vdi)
4288 if cmd == "activate":
4289 vdi._activate()
4290 print("VDI file: %s" % vdi.path)
4291 if cmd == "deactivate":
4292 ns = NS_PREFIX_LVM + sr.uuid
4293 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False)
4294 if cmd == "inflate":
4295 vdi.inflateFully()
4296 sr.cleanup()
4297 if cmd == "deflate":
4298 vdi.deflate()
4299 sr.cleanup()
4300 sr.scanLocked()
4301 print("VDI after: %s" % vdi)
4304def abort_optional_reenable(uuid):
4305 print("Disabling GC/coalesce for %s" % uuid)
4306 ret = _abort(uuid)
4307 input("Press enter to re-enable...")
4308 print("GC/coalesce re-enabled")
4309 lockGCRunning.release()
4310 if ret:
4311 lockGCActive.release()
4314##############################################################################
4315#
4316# CLI
4317#
4318def main():
4319 action = ""
4320 maxAge = 0
4321 uuid = ""
4322 background = False
4323 force = False
4324 dryRun = False
4325 debug_cmd = ""
4326 vdi_uuid = ""
4327 shortArgs = "gGc:aqxu:bfdt:v:"
4328 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable",
4329 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
4331 try:
4332 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs)
4333 except getopt.GetoptError:
4334 usage()
4335 for o, a in opts:
4336 if o in ("-g", "--gc"):
4337 action = "gc"
4338 if o in ("-G", "--gc_force"):
4339 action = "gc_force"
4340 if o in ("-c", "--clean_cache"):
4341 action = "clean_cache"
4342 maxAge = int(a)
4343 if o in ("-a", "--abort"):
4344 action = "abort"
4345 if o in ("-q", "--query"):
4346 action = "query"
4347 if o in ("-x", "--disable"):
4348 action = "disable"
4349 if o in ("-u", "--uuid"):
4350 uuid = a
4351 if o in ("-b", "--background"):
4352 background = True
4353 if o in ("-f", "--force"):
4354 force = True
4355 if o in ("-d", "--dry-run"):
4356 Util.log("Dry run mode")
4357 dryRun = True
4358 if o in ("-t", "--debug"):
4359 action = "debug"
4360 debug_cmd = a
4361 if o in ("-v", "--vdi_uuid"):
4362 vdi_uuid = a
4364 if not action or not uuid:
4365 usage()
4366 if action == "debug" and not (debug_cmd and vdi_uuid) or \
4367 action != "debug" and (debug_cmd or vdi_uuid):
4368 usage()
4370 if action != "query" and action != "debug":
4371 print("All output goes to log")
4373 if action == "gc":
4374 gc(None, uuid, background, dryRun)
4375 elif action == "gc_force":
4376 gc_force(None, uuid, force, dryRun, True)
4377 elif action == "clean_cache":
4378 cache_cleanup(None, uuid, maxAge)
4379 elif action == "abort":
4380 abort(uuid)
4381 elif action == "query":
4382 print("Currently running: %s" % get_state(uuid))
4383 elif action == "disable":
4384 abort_optional_reenable(uuid)
4385 elif action == "debug":
4386 debug(uuid, debug_cmd, vdi_uuid)
4389if __name__ == '__main__': 4389 ↛ 4390line 4389 didn't jump to line 4390, because the condition on line 4389 was never true
4390 main()