Coverage for drivers/cleanup.py : 32%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/python3
2#
3# Copyright (C) Citrix Systems Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; version 2.1 only.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Script to coalesce and garbage collect COW-based SR's in the background
19#
21from sm_typing import Any, Optional, List, override
23import os
24import os.path
25import sys
26import time
27import signal
28import subprocess
29import getopt
30import datetime
31import traceback
32import base64
33import zlib
34import errno
35import stat
37import XenAPI # pylint: disable=import-error
38import util
39import lvutil
40import lvmcache
41import journaler
42import fjournaler
43import lock
44import blktap2
45import xs_errors
46from refcounter import RefCounter
47from ipc import IPCFlag
48from lvmanager import LVActivator
49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG
50from functools import reduce
51from time import monotonic as _time
53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX
54from cowutil import CowImageInfo, CowUtil, getCowUtil
55from lvmcowutil import LV_PREFIX, LvmCowUtil
56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION
58try:
59 from linstorcowutil import LinstorCowUtil
60 from linstorjournaler import LinstorJournaler
61 from linstorvolumemanager import get_controller_uri
62 from linstorvolumemanager import LinstorVolumeManager
63 from linstorvolumemanager import LinstorVolumeManagerError
64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
66 LINSTOR_AVAILABLE = True
67except ImportError:
68 LINSTOR_AVAILABLE = False
70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not
71# possible due to lvhd_stop_using_() not working correctly. However, we leave
72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI
73# record for use by the offline tool (which makes the operation safe by pausing
74# the VM first)
75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True
77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce
79# process "lock", used simply as an indicator that a process already exists
80# that is doing GC/coalesce on this SR (such a process holds the lock, and we
81# check for the fact by trying the lock).
82lockGCRunning = None
84# process "lock" to indicate that the GC process has been activated but may not
85# yet be running, stops a second process from being started.
86LOCK_TYPE_GC_ACTIVE = "gc_active"
87lockGCActive = None
89# Default coalesce error rate limit, in messages per minute. A zero value
90# disables throttling, and a negative value disables error reporting.
91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
93COALESCE_LAST_ERR_TAG = 'last-coalesce-error'
94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate'
95VAR_RUN = "/var/run/"
96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log"
98N_RUNNING_AVERAGE = 10
100NON_PERSISTENT_DIR = '/run/nonpersistent/sm'
102# Signal Handler
103SIGTERM = False
106class AbortException(util.SMException):
107 pass
110def receiveSignal(signalNumber, frame):
111 global SIGTERM
113 util.SMlog("GC: recieved SIGTERM")
114 SIGTERM = True
115 return
118################################################################################
119#
120# Util
121#
122class Util:
123 RET_RC = 1
124 RET_STDOUT = 2
125 RET_STDERR = 4
127 UUID_LEN = 36
129 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024}
131 @staticmethod
132 def log(text) -> None:
133 util.SMlog(text, ident="SMGC")
135 @staticmethod
136 def logException(tag):
137 info = sys.exc_info()
138 if info[0] == SystemExit: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true
139 # this should not be happening when catching "Exception", but it is
140 sys.exit(0)
141 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2]))
142 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
143 Util.log(" ***********************")
144 Util.log(" * E X C E P T I O N *")
145 Util.log(" ***********************")
146 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1]))
147 Util.log(tb)
148 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
150 @staticmethod
151 def doexec(args, expectedRC, inputtext=None, ret=None, log=True):
152 "Execute a subprocess, then return its return code, stdout, stderr"
153 proc = subprocess.Popen(args,
154 stdin=subprocess.PIPE, \
155 stdout=subprocess.PIPE, \
156 stderr=subprocess.PIPE, \
157 shell=True, \
158 close_fds=True)
159 (stdout, stderr) = proc.communicate(inputtext)
160 stdout = str(stdout)
161 stderr = str(stderr)
162 rc = proc.returncode
163 if log:
164 Util.log("`%s`: %s" % (args, rc))
165 if type(expectedRC) != type([]):
166 expectedRC = [expectedRC]
167 if not rc in expectedRC:
168 reason = stderr.strip()
169 if stdout.strip():
170 reason = "%s (stdout: %s)" % (reason, stdout.strip())
171 Util.log("Failed: %s" % reason)
172 raise util.CommandException(rc, args, reason)
174 if ret == Util.RET_RC:
175 return rc
176 if ret == Util.RET_STDERR:
177 return stderr
178 return stdout
180 @staticmethod
181 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut):
182 """execute func in a separate thread and kill it if abortTest signals
183 so"""
184 abortSignaled = abortTest() # check now before we clear resultFlag
185 resultFlag = IPCFlag(ns)
186 resultFlag.clearAll()
187 pid = os.fork()
188 if pid:
189 startTime = _time()
190 try:
191 while True:
192 if resultFlag.test("success"):
193 Util.log(" Child process completed successfully")
194 resultFlag.clear("success")
195 return
196 if resultFlag.test("failure"):
197 resultFlag.clear("failure")
198 raise util.SMException("Child process exited with error")
199 if abortTest() or abortSignaled or SIGTERM:
200 os.killpg(pid, signal.SIGKILL)
201 raise AbortException("Aborting due to signal")
202 if timeOut and _time() - startTime > timeOut:
203 os.killpg(pid, signal.SIGKILL)
204 resultFlag.clearAll()
205 raise util.SMException("Timed out")
206 time.sleep(pollInterval)
207 finally:
208 wait_pid = 0
209 rc = -1
210 count = 0
211 while wait_pid == 0 and count < 10:
212 wait_pid, rc = os.waitpid(pid, os.WNOHANG)
213 if wait_pid == 0:
214 time.sleep(2)
215 count += 1
217 if wait_pid == 0:
218 Util.log("runAbortable: wait for process completion timed out")
219 else:
220 os.setpgrp()
221 try:
222 if func() == ret:
223 resultFlag.set("success")
224 else:
225 resultFlag.set("failure")
226 except Exception as e:
227 Util.log("Child process failed with : (%s)" % e)
228 resultFlag.set("failure")
229 Util.logException("This exception has occured")
230 os._exit(0)
232 @staticmethod
233 def num2str(number):
234 for prefix in ("G", "M", "K"): 234 ↛ 237line 234 didn't jump to line 237, because the loop on line 234 didn't complete
235 if number >= Util.PREFIX[prefix]:
236 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix)
237 return "%s" % number
239 @staticmethod
240 def numBits(val):
241 count = 0
242 while val:
243 count += val & 1
244 val = val >> 1
245 return count
247 @staticmethod
248 def countBits(bitmap1, bitmap2):
249 """return bit count in the bitmap produced by ORing the two bitmaps"""
250 len1 = len(bitmap1)
251 len2 = len(bitmap2)
252 lenLong = len1
253 lenShort = len2
254 bitmapLong = bitmap1
255 if len2 > len1:
256 lenLong = len2
257 lenShort = len1
258 bitmapLong = bitmap2
260 count = 0
261 for i in range(lenShort):
262 val = bitmap1[i] | bitmap2[i]
263 count += Util.numBits(val)
265 for i in range(i + 1, lenLong):
266 val = bitmapLong[i]
267 count += Util.numBits(val)
268 return count
270 @staticmethod
271 def getThisScript():
272 thisScript = util.get_real_path(__file__)
273 if thisScript.endswith(".pyc"):
274 thisScript = thisScript[:-1]
275 return thisScript
278################################################################################
279#
280# XAPI
281#
282class XAPI:
283 USER = "root"
284 PLUGIN_ON_SLAVE = "on-slave"
286 CONFIG_SM = 0
287 CONFIG_OTHER = 1
288 CONFIG_ON_BOOT = 2
289 CONFIG_ALLOW_CACHING = 3
291 CONFIG_NAME = {
292 CONFIG_SM: "sm-config",
293 CONFIG_OTHER: "other-config",
294 CONFIG_ON_BOOT: "on-boot",
295 CONFIG_ALLOW_CACHING: "allow_caching"
296 }
298 class LookupError(util.SMException):
299 pass
301 @staticmethod
302 def getSession():
303 session = XenAPI.xapi_local()
304 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM')
305 return session
307 def __init__(self, session, srUuid):
308 self.sessionPrivate = False
309 self.session = session
310 if self.session is None:
311 self.session = self.getSession()
312 self.sessionPrivate = True
313 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid)
314 self.srRecord = self.session.xenapi.SR.get_record(self._srRef)
315 self.hostUuid = util.get_this_host()
316 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
317 self.task = None
318 self.task_progress = {"coalescable": 0, "done": 0}
320 def __del__(self):
321 if self.sessionPrivate:
322 self.session.xenapi.session.logout()
324 @property
325 def srRef(self):
326 return self._srRef
328 def isPluggedHere(self):
329 pbds = self.getAttachedPBDs()
330 for pbdRec in pbds:
331 if pbdRec["host"] == self._hostRef:
332 return True
333 return False
335 def poolOK(self):
336 host_recs = self.session.xenapi.host.get_all_records()
337 for host_ref, host_rec in host_recs.items():
338 if not host_rec["enabled"]:
339 Util.log("Host %s not enabled" % host_rec["uuid"])
340 return False
341 return True
343 def isMaster(self):
344 if self.srRecord["shared"]:
345 pool = list(self.session.xenapi.pool.get_all_records().values())[0]
346 return pool["master"] == self._hostRef
347 else:
348 pbds = self.getAttachedPBDs()
349 if len(pbds) < 1:
350 raise util.SMException("Local SR not attached")
351 elif len(pbds) > 1:
352 raise util.SMException("Local SR multiply attached")
353 return pbds[0]["host"] == self._hostRef
355 def getAttachedPBDs(self):
356 """Return PBD records for all PBDs of this SR that are currently
357 attached"""
358 attachedPBDs = []
359 pbds = self.session.xenapi.PBD.get_all_records()
360 for pbdRec in pbds.values():
361 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]:
362 attachedPBDs.append(pbdRec)
363 return attachedPBDs
365 def getOnlineHosts(self):
366 return util.get_online_hosts(self.session)
368 def ensureInactive(self, hostRef, args):
369 text = self.session.xenapi.host.call_plugin( \
370 hostRef, self.PLUGIN_ON_SLAVE, "multi", args)
371 Util.log("call-plugin returned: '%s'" % text)
373 def getRecordHost(self, hostRef):
374 return self.session.xenapi.host.get_record(hostRef)
376 def _getRefVDI(self, uuid):
377 return self.session.xenapi.VDI.get_by_uuid(uuid)
379 def getRefVDI(self, vdi):
380 return self._getRefVDI(vdi.uuid)
382 def getRecordVDI(self, uuid):
383 try:
384 ref = self._getRefVDI(uuid)
385 return self.session.xenapi.VDI.get_record(ref)
386 except XenAPI.Failure:
387 return None
389 def singleSnapshotVDI(self, vdi):
390 return self.session.xenapi.VDI.snapshot(vdi.getRef(),
391 {"type": "internal"})
393 def forgetVDI(self, srUuid, vdiUuid):
394 """Forget the VDI, but handle the case where the VDI has already been
395 forgotten (i.e. ignore errors)"""
396 try:
397 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid)
398 self.session.xenapi.VDI.forget(vdiRef)
399 except XenAPI.Failure:
400 pass
402 def getConfigVDI(self, vdi, key):
403 kind = vdi.CONFIG_TYPE[key]
404 if kind == self.CONFIG_SM:
405 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef())
406 elif kind == self.CONFIG_OTHER:
407 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef())
408 elif kind == self.CONFIG_ON_BOOT:
409 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef())
410 elif kind == self.CONFIG_ALLOW_CACHING:
411 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef())
412 else:
413 assert(False)
414 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg)))
415 return cfg
417 def removeFromConfigVDI(self, vdi, key):
418 kind = vdi.CONFIG_TYPE[key]
419 if kind == self.CONFIG_SM:
420 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key)
421 elif kind == self.CONFIG_OTHER:
422 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key)
423 else:
424 assert(False)
426 def addToConfigVDI(self, vdi, key, val):
427 kind = vdi.CONFIG_TYPE[key]
428 if kind == self.CONFIG_SM:
429 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val)
430 elif kind == self.CONFIG_OTHER:
431 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val)
432 else:
433 assert(False)
435 def isSnapshot(self, vdi):
436 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
438 def markCacheSRsDirty(self):
439 sr_refs = self.session.xenapi.SR.get_all_records_where( \
440 'field "local_cache_enabled" = "true"')
441 for sr_ref in sr_refs:
442 Util.log("Marking SR %s dirty" % sr_ref)
443 util.set_dirty(self.session, sr_ref)
445 def srUpdate(self):
446 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"])
447 abortFlag = IPCFlag(self.srRecord["uuid"])
448 task = self.session.xenapi.Async.SR.update(self._srRef)
449 cancelTask = True
450 try:
451 for i in range(60):
452 status = self.session.xenapi.task.get_status(task)
453 if not status == "pending":
454 Util.log("SR.update_asynch status changed to [%s]" % status)
455 cancelTask = False
456 return
457 if abortFlag.test(FLAG_TYPE_ABORT):
458 Util.log("Abort signalled during srUpdate, cancelling task...")
459 try:
460 self.session.xenapi.task.cancel(task)
461 cancelTask = False
462 Util.log("Task cancelled")
463 except:
464 pass
465 return
466 time.sleep(1)
467 finally:
468 if cancelTask:
469 self.session.xenapi.task.cancel(task)
470 self.session.xenapi.task.destroy(task)
471 Util.log("Asynch srUpdate still running, but timeout exceeded.")
473 def update_task(self):
474 self.session.xenapi.task.set_other_config(
475 self.task,
476 {
477 "applies_to": self._srRef
478 })
479 total = self.task_progress['coalescable'] + self.task_progress['done']
480 if (total > 0):
481 self.session.xenapi.task.set_progress(
482 self.task, float(self.task_progress['done']) / total)
484 def create_task(self, label, description):
485 self.task = self.session.xenapi.task.create(label, description)
486 self.update_task()
488 def update_task_progress(self, key, value):
489 self.task_progress[key] = value
490 if self.task:
491 self.update_task()
493 def set_task_status(self, status):
494 if self.task:
495 self.session.xenapi.task.set_status(self.task, status)
498################################################################################
499#
500# VDI
501#
502class VDI(object):
503 """Object representing a VDI of a COW-based SR"""
505 POLL_INTERVAL = 1
506 POLL_TIMEOUT = 30
507 DEVICE_MAJOR = 202
509 # config keys & values
510 DB_VDI_PARENT = "vhd-parent"
511 DB_VDI_TYPE = "vdi_type"
512 DB_VDI_BLOCKS = "vhd-blocks"
513 DB_VDI_PAUSED = "paused"
514 DB_VDI_RELINKING = "relinking"
515 DB_VDI_ACTIVATING = "activating"
516 DB_GC = "gc"
517 DB_COALESCE = "coalesce"
518 DB_LEAFCLSC = "leaf-coalesce" # config key
519 DB_GC_NO_SPACE = "gc_no_space"
520 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce
521 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce
522 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means
523 # no space to snap-coalesce or unable to keep
524 # up with VDI. This is not used by the SM, it
525 # might be used by external components.
526 DB_ONBOOT = "on-boot"
527 ONBOOT_RESET = "reset"
528 DB_ALLOW_CACHING = "allow_caching"
530 CONFIG_TYPE = {
531 DB_VDI_PARENT: XAPI.CONFIG_SM,
532 DB_VDI_TYPE: XAPI.CONFIG_SM,
533 DB_VDI_BLOCKS: XAPI.CONFIG_SM,
534 DB_VDI_PAUSED: XAPI.CONFIG_SM,
535 DB_VDI_RELINKING: XAPI.CONFIG_SM,
536 DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
537 DB_GC: XAPI.CONFIG_OTHER,
538 DB_COALESCE: XAPI.CONFIG_OTHER,
539 DB_LEAFCLSC: XAPI.CONFIG_OTHER,
540 DB_ONBOOT: XAPI.CONFIG_ON_BOOT,
541 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING,
542 DB_GC_NO_SPACE: XAPI.CONFIG_SM
543 }
545 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes
546 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds
547 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating
548 # feasibility of leaf coalesce
550 JRN_RELINK = "relink" # journal entry type for relinking children
551 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced
552 JRN_LEAF = "leaf" # used in coalesce-leaf
554 STR_TREE_INDENT = 4
556 def __init__(self, sr, uuid, vdi_type):
557 self.sr = sr
558 self.scanError = True
559 self.uuid = uuid
560 self.vdi_type = vdi_type
561 self.fileName = ""
562 self.parentUuid = ""
563 self.sizeVirt = -1
564 self._sizePhys = -1
565 self._sizeAllocated = -1
566 self.hidden = False
567 self.parent = None
568 self.children = []
569 self._vdiRef = None
570 self.cowutil = getCowUtil(vdi_type)
571 self._clearRef()
573 @staticmethod
574 def extractUuid(path):
575 raise NotImplementedError("Implement in sub class")
577 def load(self, info=None) -> None:
578 """Load VDI info"""
579 pass
581 def getDriverName(self) -> str:
582 return self.vdi_type
584 def getRef(self):
585 if self._vdiRef is None:
586 self._vdiRef = self.sr.xapi.getRefVDI(self)
587 return self._vdiRef
589 def getConfig(self, key, default=None):
590 config = self.sr.xapi.getConfigVDI(self, key)
591 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true
592 val = config
593 else:
594 val = config.get(key)
595 if val:
596 return val
597 return default
599 def setConfig(self, key, val):
600 self.sr.xapi.removeFromConfigVDI(self, key)
601 self.sr.xapi.addToConfigVDI(self, key, val)
602 Util.log("Set %s = %s for %s" % (key, val, self))
604 def delConfig(self, key):
605 self.sr.xapi.removeFromConfigVDI(self, key)
606 Util.log("Removed %s from %s" % (key, self))
608 def ensureUnpaused(self):
609 if self.getConfig(self.DB_VDI_PAUSED) == "true":
610 Util.log("Unpausing VDI %s" % self)
611 self.unpause()
613 def pause(self, failfast=False) -> None:
614 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid,
615 self.uuid, failfast):
616 raise util.SMException("Failed to pause VDI %s" % self)
618 def _report_tapdisk_unpause_error(self):
619 try:
620 xapi = self.sr.xapi.session.xenapi
621 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid)
622 msg_name = "failed to unpause tapdisk"
623 msg_body = "Failed to unpause tapdisk for VDI %s, " \
624 "VMs using this tapdisk have lost access " \
625 "to the corresponding disk(s)" % self.uuid
626 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body)
627 except Exception as e:
628 util.SMlog("failed to generate message: %s" % e)
630 def unpause(self):
631 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid,
632 self.uuid):
633 self._report_tapdisk_unpause_error()
634 raise util.SMException("Failed to unpause VDI %s" % self)
636 def refresh(self, ignoreNonexistent=True):
637 """Pause-unpause in one step"""
638 self.sr.lock()
639 try:
640 try:
641 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 641 ↛ 643line 641 didn't jump to line 643, because the condition on line 641 was never true
642 self.sr.uuid, self.uuid):
643 self._report_tapdisk_unpause_error()
644 raise util.SMException("Failed to refresh %s" % self)
645 except XenAPI.Failure as e:
646 if util.isInvalidVDI(e) and ignoreNonexistent:
647 Util.log("VDI %s not found, ignoring" % self)
648 return
649 raise
650 finally:
651 self.sr.unlock()
653 def isSnapshot(self):
654 return self.sr.xapi.isSnapshot(self)
656 def isAttachedRW(self):
657 return util.is_attached_rw(
658 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
660 def getVDIBlocks(self):
661 val = self.updateBlockInfo()
662 bitmap = zlib.decompress(base64.b64decode(val))
663 return bitmap
665 def isCoalesceable(self):
666 """A VDI is coalesceable if it has no siblings and is not a leaf"""
667 return not self.scanError and \
668 self.parent and \
669 len(self.parent.children) == 1 and \
670 self.hidden and \
671 len(self.children) > 0
673 def isLeafCoalesceable(self):
674 """A VDI is leaf-coalesceable if it has no siblings and is a leaf"""
675 return not self.scanError and \
676 self.parent and \
677 len(self.parent.children) == 1 and \
678 not self.hidden and \
679 len(self.children) == 0
681 def canLiveCoalesce(self, speed):
682 """Can we stop-and-leaf-coalesce this VDI? The VDI must be
683 isLeafCoalesceable() already"""
684 feasibleSize = False
685 allowedDownTime = \
686 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT
687 allocated_size = self.getAllocatedSize()
688 if speed:
689 feasibleSize = \
690 allocated_size // speed < allowedDownTime
691 else:
692 feasibleSize = \
693 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE
695 return (feasibleSize or
696 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
698 def getAllPrunable(self):
699 if len(self.children) == 0: # base case
700 # it is possible to have a hidden leaf that was recently coalesced
701 # onto its parent, its children already relinked but not yet
702 # reloaded - in which case it may not be garbage collected yet:
703 # some tapdisks could still be using the file.
704 if self.sr.journaler.get(self.JRN_RELINK, self.uuid):
705 return []
706 if not self.scanError and self.hidden:
707 return [self]
708 return []
710 thisPrunable = True
711 vdiList = []
712 for child in self.children:
713 childList = child.getAllPrunable()
714 vdiList.extend(childList)
715 if child not in childList:
716 thisPrunable = False
718 # We can destroy the current VDI if all childs are hidden BUT the
719 # current VDI must be hidden too to do that!
720 # Example in this case (after a failed live leaf coalesce):
721 #
722 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees):
723 # SMGC: [32436] b5458d61(1.000G/4.127M)
724 # SMGC: [32436] *OLD_b545(1.000G/4.129M)
725 #
726 # OLD_b545 is hidden and must be removed, but b5458d61 not.
727 # Normally we are not in this function when the delete action is
728 # executed but in `_liveLeafCoalesce`.
730 if not self.scanError and not self.hidden and thisPrunable:
731 vdiList.append(self)
732 return vdiList
734 def getSizePhys(self) -> int:
735 return self._sizePhys
737 def getAllocatedSize(self) -> int:
738 return self._sizeAllocated
740 def getTreeRoot(self):
741 "Get the root of the tree that self belongs to"
742 root = self
743 while root.parent:
744 root = root.parent
745 return root
747 def getTreeHeight(self):
748 "Get the height of the subtree rooted at self"
749 if len(self.children) == 0:
750 return 1
752 maxChildHeight = 0
753 for child in self.children:
754 childHeight = child.getTreeHeight()
755 if childHeight > maxChildHeight:
756 maxChildHeight = childHeight
758 return maxChildHeight + 1
760 def getAllLeaves(self) -> List["VDI"]:
761 "Get all leaf nodes in the subtree rooted at self"
762 if len(self.children) == 0:
763 return [self]
765 leaves = []
766 for child in self.children:
767 leaves.extend(child.getAllLeaves())
768 return leaves
770 def updateBlockInfo(self) -> Optional[str]:
771 val = base64.b64encode(self._queryCowBlocks()).decode()
772 self.setConfig(VDI.DB_VDI_BLOCKS, val)
773 return val
775 def rename(self, uuid) -> None:
776 "Rename the VDI file"
777 assert(not self.sr.vdis.get(uuid))
778 self._clearRef()
779 oldUuid = self.uuid
780 self.uuid = uuid
781 self.children = []
782 # updating the children themselves is the responsibility of the caller
783 del self.sr.vdis[oldUuid]
784 self.sr.vdis[self.uuid] = self
786 def delete(self) -> None:
787 "Physically delete the VDI"
788 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
789 lock.Lock.cleanupAll(self.uuid)
790 self._clear()
792 def getParent(self) -> str:
793 return self.cowutil.getParent(self.path, lambda x: x.strip()) 793 ↛ exitline 793 didn't run the lambda on line 793
795 def repair(self, parent) -> None:
796 self.cowutil.repair(parent)
798 @override
799 def __str__(self) -> str:
800 strHidden = ""
801 if self.hidden: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true
802 strHidden = "*"
803 strSizeVirt = "?"
804 if self.sizeVirt > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true
805 strSizeVirt = Util.num2str(self.sizeVirt)
806 strSizePhys = "?"
807 if self._sizePhys > 0: 807 ↛ 808line 807 didn't jump to line 808, because the condition on line 807 was never true
808 strSizePhys = "/%s" % Util.num2str(self._sizePhys)
809 strSizeAllocated = "?"
810 if self._sizeAllocated >= 0:
811 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated)
812 strType = "[{}]".format(self.vdi_type)
814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt,
815 strSizePhys, strSizeAllocated, strType)
817 def validate(self, fast=False) -> None:
818 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true
819 raise util.SMException("COW image %s corrupted" % self)
821 def _clear(self):
822 self.uuid = ""
823 self.path = ""
824 self.parentUuid = ""
825 self.parent = None
826 self._clearRef()
828 def _clearRef(self):
829 self._vdiRef = None
831 def _call_plug_cancel(self, hostRef):
832 args = {"path": self.path, "vdi_type": self.vdi_type}
833 self.sr.xapi.session.xenapi.host.call_plugin( \
834 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args)
836 def _call_plugin_coalesce(self, hostRef):
837 args = {"path": self.path, "vdi_type": self.vdi_type}
838 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args))
839 ret = self.sr.xapi.session.xenapi.host.call_plugin( \
840 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args)
841 util.SMlog("DAMS: Remote coalesce returned {}".format(ret))
843 def _doCoalesceOnHost(self, hostRef):
844 self.validate()
845 self.parent.validate(True)
846 self.parent._increaseSizeVirt(self.sizeVirt)
847 self.sr._updateSlavesOnResize(self.parent)
848 #TODO: We might need to make the LV RW on the slave directly for coalesce?
849 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them
851 def abortTest():
852 file = self.sr._gc_running_file(self)
853 try:
854 with open(file, "r") as f:
855 if not f.read():
856 #TODO: Need to call commit cancel on the hostRef if we stop
857 util.SMlog("DAMS: Cancelling")
858 self._call_plug_cancel(hostRef)
859 return True
860 except OSError as e:
861 if e.errno == errno.ENOENT:
862 util.SMlog("File {} does not exist".format(file))
863 else:
864 util.SMlog("IOError: {}".format(e))
865 return True
866 return False
868 #TODO: Add exception handling here like when callinng in a runAbortable situation_doCoalesceCOWImage
869 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef),
870 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
872 self.parent.validate(True)
873 #self._verifyContents(0)
874 self.parent.updateBlockInfo()
876 def _isOpenOnHosts(self) -> Optional[str]:
877 for pbdRecord in self.sr.xapi.getAttachedPBDs():
878 hostRef = pbdRecord["host"]
879 args = {"path": self.path}
880 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \
881 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args))
882 if is_openers:
883 return hostRef
884 return None
886 def _doCoalesce(self) -> None:
887 """Coalesce self onto parent. Only perform the actual coalescing of
888 an image, but not the subsequent relinking. We'll do that as the next step,
889 after reloading the entire SR in case things have changed while we
890 were coalescing"""
891 self.validate()
892 self.parent.validate(True)
893 self.parent._increaseSizeVirt(self.sizeVirt)
894 self.sr._updateSlavesOnResize(self.parent)
895 self._coalesceCowImage(0)
896 self.parent.validate(True)
897 #self._verifyContents(0)
898 self.parent.updateBlockInfo()
900 def _verifyContents(self, timeOut):
901 Util.log(" Coalesce verification on %s" % self)
902 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
903 Util.runAbortable(lambda: self._runTapdiskDiff(), True,
904 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
905 Util.log(" Coalesce verification succeeded")
907 def _runTapdiskDiff(self):
908 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \
909 (self.getDriverName(), self.path, \
910 self.parent.getDriverName(), self.parent.path)
911 Util.doexec(cmd, 0)
912 return True
914 @staticmethod
915 def _reportCoalesceError(vdi, ce):
916 """Reports a coalesce error to XenCenter.
918 vdi: the VDI object on which the coalesce error occured
919 ce: the CommandException that was raised"""
921 msg_name = os.strerror(ce.code)
922 if ce.code == errno.ENOSPC:
923 # TODO We could add more information here, e.g. exactly how much
924 # space is required for the particular coalesce, as well as actions
925 # to be taken by the user and consequences of not taking these
926 # actions.
927 msg_body = 'Run out of space while coalescing.'
928 elif ce.code == errno.EIO:
929 msg_body = 'I/O error while coalescing.'
930 else:
931 msg_body = ''
932 util.SMlog('Coalesce failed on SR %s: %s (%s)'
933 % (vdi.sr.uuid, msg_name, msg_body))
935 # Create a XenCenter message, but don't spam.
936 xapi = vdi.sr.xapi.session.xenapi
937 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid)
938 oth_cfg = xapi.SR.get_other_config(sr_ref)
939 if COALESCE_ERR_RATE_TAG in oth_cfg:
940 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG])
941 else:
942 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
944 xcmsg = False
945 if coalesce_err_rate == 0:
946 xcmsg = True
947 elif coalesce_err_rate > 0:
948 now = datetime.datetime.now()
949 sm_cfg = xapi.SR.get_sm_config(sr_ref)
950 if COALESCE_LAST_ERR_TAG in sm_cfg:
951 # seconds per message (minimum distance in time between two
952 # messages in seconds)
953 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60)
954 last = datetime.datetime.fromtimestamp(
955 float(sm_cfg[COALESCE_LAST_ERR_TAG]))
956 if now - last >= spm:
957 xapi.SR.remove_from_sm_config(sr_ref,
958 COALESCE_LAST_ERR_TAG)
959 xcmsg = True
960 else:
961 xcmsg = True
962 if xcmsg:
963 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG,
964 str(now.strftime('%s')))
965 if xcmsg:
966 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
968 def coalesce(self) -> int:
969 return self.cowutil.coalesce(self.path)
971 @staticmethod
972 def _doCoalesceCowImage(vdi: "VDI"):
973 try:
974 startTime = time.time()
975 allocated_size = vdi.getAllocatedSize()
976 coalesced_size = vdi.coalesce()
977 endTime = time.time()
978 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size)
979 except util.CommandException as ce:
980 # We use try/except for the following piece of code because it runs
981 # in a separate process context and errors will not be caught and
982 # reported by anyone.
983 try:
984 # Report coalesce errors back to user via XC
985 VDI._reportCoalesceError(vdi, ce)
986 except Exception as e:
987 util.SMlog('failed to create XenCenter message: %s' % e)
988 raise ce
989 except:
990 raise
992 def _vdi_is_raw(self, vdi_path):
993 """
994 Given path to vdi determine if it is raw
995 """
996 uuid = self.extractUuid(vdi_path)
997 return self.sr.vdis[uuid].vdi_type == VdiType.RAW
999 def _coalesceCowImage(self, timeOut):
1000 Util.log(" Running COW coalesce on %s" % self)
1001 def abortTest():
1002 if self.cowutil.isCoalesceableOnRemote():
1003 file = self.sr._gc_running_file(self)
1004 try:
1005 with open(file, "r") as f:
1006 if not f.read():
1007 return True
1008 except OSError as e:
1009 if e.errno == errno.ENOENT:
1010 util.SMlog("File {} does not exist".format(file))
1011 else:
1012 util.SMlog("IOError: {}".format(e))
1013 return True
1014 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1016 try:
1017 util.fistpoint.activate_custom_fn(
1018 "cleanup_coalesceVHD_inject_failure",
1019 util.inject_failure)
1020 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None,
1021 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
1022 except:
1023 # Exception at this phase could indicate a failure in COW coalesce
1024 # or a kill of COW coalesce by runAbortable due to timeOut
1025 # Try a repair and reraise the exception
1026 parent = ""
1027 try:
1028 parent = self.getParent()
1029 if not self._vdi_is_raw(parent):
1030 # Repair error is logged and ignored. Error reraised later
1031 util.SMlog('Coalesce failed on %s, attempting repair on ' \
1032 'parent %s' % (self.uuid, parent))
1033 self.repair(parent)
1034 except Exception as e:
1035 util.SMlog('(error ignored) Failed to repair parent %s ' \
1036 'after failed coalesce on %s, err: %s' %
1037 (parent, self.path, e))
1038 raise
1040 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid)
1042 def _relinkSkip(self) -> None:
1043 """Relink children of this VDI to point to the parent of this VDI"""
1044 abortFlag = IPCFlag(self.sr.uuid)
1045 for child in self.children:
1046 if abortFlag.test(FLAG_TYPE_ABORT): 1046 ↛ 1047line 1046 didn't jump to line 1047, because the condition on line 1046 was never true
1047 raise AbortException("Aborting due to signal")
1048 Util.log(" Relinking %s from %s to %s" % \
1049 (child, self, self.parent))
1050 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid)
1051 child._setParent(self.parent)
1052 self.children = []
1054 def _reloadChildren(self, vdiSkip):
1055 """Pause & unpause all VDIs in the subtree to cause blktap to reload
1056 the COW image metadata for this file in any online VDI"""
1057 abortFlag = IPCFlag(self.sr.uuid)
1058 for child in self.children:
1059 if child == vdiSkip:
1060 continue
1061 if abortFlag.test(FLAG_TYPE_ABORT): 1061 ↛ 1062line 1061 didn't jump to line 1062, because the condition on line 1061 was never true
1062 raise AbortException("Aborting due to signal")
1063 Util.log(" Reloading VDI %s" % child)
1064 child._reload()
1066 def _reload(self):
1067 """Pause & unpause to cause blktap to reload the image metadata"""
1068 for child in self.children: 1068 ↛ 1069line 1068 didn't jump to line 1069, because the loop on line 1068 never started
1069 child._reload()
1071 # only leaves can be attached
1072 if len(self.children) == 0: 1072 ↛ exitline 1072 didn't return from function '_reload', because the condition on line 1072 was never false
1073 try:
1074 self.delConfig(VDI.DB_VDI_RELINKING)
1075 except XenAPI.Failure as e:
1076 if not util.isInvalidVDI(e):
1077 raise
1078 self.refresh()
1080 def _tagChildrenForRelink(self):
1081 if len(self.children) == 0:
1082 retries = 0
1083 try:
1084 while retries < 15:
1085 retries += 1
1086 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
1087 Util.log("VDI %s is activating, wait to relink" %
1088 self.uuid)
1089 else:
1090 self.setConfig(VDI.DB_VDI_RELINKING, "True")
1092 if self.getConfig(VDI.DB_VDI_ACTIVATING):
1093 self.delConfig(VDI.DB_VDI_RELINKING)
1094 Util.log("VDI %s started activating while tagging" %
1095 self.uuid)
1096 else:
1097 return
1098 time.sleep(2)
1100 raise util.SMException("Failed to tag vdi %s for relink" % self)
1101 except XenAPI.Failure as e:
1102 if not util.isInvalidVDI(e):
1103 raise
1105 for child in self.children:
1106 child._tagChildrenForRelink()
1108 def _loadInfoParent(self):
1109 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid)
1110 if ret:
1111 self.parentUuid = ret
1113 def _setParent(self, parent) -> None:
1114 self.cowutil.setParent(self.path, parent.path, False)
1115 self.parent = parent
1116 self.parentUuid = parent.uuid
1117 parent.children.append(self)
1118 try:
1119 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1120 Util.log("Updated the vhd-parent field for child %s with %s" % \
1121 (self.uuid, self.parentUuid))
1122 except:
1123 Util.log("Failed to update %s with vhd-parent field %s" % \
1124 (self.uuid, self.parentUuid))
1126 def _loadInfoHidden(self) -> None:
1127 hidden = self.cowutil.getHidden(self.path)
1128 self.hidden = (hidden != 0)
1130 def _setHidden(self, hidden=True) -> None:
1131 self.cowutil.setHidden(self.path, hidden)
1132 self.hidden = hidden
1134 def _increaseSizeVirt(self, size, atomic=True) -> None:
1135 """ensure the virtual size of 'self' is at least 'size'. Note that
1136 resizing a COW image must always be offline and atomically: the file must
1137 not be open by anyone and no concurrent operations may take place.
1138 Thus we use the Agent API call for performing paused atomic
1139 operations. If the caller is already in the atomic context, it must
1140 call with atomic = False"""
1141 if self.sizeVirt >= size: 1141 ↛ 1143line 1141 didn't jump to line 1143, because the condition on line 1141 was never false
1142 return
1143 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1144 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1146 msize = self.cowutil.getMaxResizeSize(self.path)
1147 if (size <= msize):
1148 self.cowutil.setSizeVirtFast(self.path, size)
1149 else:
1150 if atomic:
1151 vdiList = self._getAllSubtree()
1152 self.sr.lock()
1153 try:
1154 self.sr.pauseVDIs(vdiList)
1155 try:
1156 self._setSizeVirt(size)
1157 finally:
1158 self.sr.unpauseVDIs(vdiList)
1159 finally:
1160 self.sr.unlock()
1161 else:
1162 self._setSizeVirt(size)
1164 self.sizeVirt = self.cowutil.getSizeVirt(self.path)
1166 def _setSizeVirt(self, size) -> None:
1167 """WARNING: do not call this method directly unless all VDIs in the
1168 subtree are guaranteed to be unplugged (and remain so for the duration
1169 of the operation): this operation is only safe for offline COW images"""
1170 jFile = os.path.join(self.sr.path, self.uuid)
1171 self.cowutil.setSizeVirt(self.path, size, jFile)
1173 def _queryCowBlocks(self) -> bytes:
1174 return self.cowutil.getBlockBitmap(self.path)
1176 def _getCoalescedSizeData(self):
1177 """Get the data size of the resulting image if we coalesce self onto
1178 parent. We calculate the actual size by using the image block allocation
1179 information (as opposed to just adding up the two image sizes to get an
1180 upper bound)"""
1181 # make sure we don't use stale BAT info from vdi_rec since the child
1182 # was writable all this time
1183 self.delConfig(VDI.DB_VDI_BLOCKS)
1184 blocksChild = self.getVDIBlocks()
1185 blocksParent = self.parent.getVDIBlocks()
1186 numBlocks = Util.countBits(blocksChild, blocksParent)
1187 Util.log("Num combined blocks = %d" % numBlocks)
1188 sizeData = numBlocks * self.cowutil.getBlockSize(self.path)
1189 assert(sizeData <= self.sizeVirt)
1190 return sizeData
1192 def _calcExtraSpaceForCoalescing(self) -> int:
1193 sizeData = self._getCoalescedSizeData()
1194 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \
1195 self.cowutil.calcOverheadEmpty(self.sizeVirt)
1196 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1197 return sizeCoalesced - self.parent.getSizePhys()
1199 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1200 """How much extra space in the SR will be required to
1201 [live-]leaf-coalesce this VDI"""
1202 # the space requirements are the same as for inline coalesce
1203 return self._calcExtraSpaceForCoalescing()
1205 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1206 """How much extra space in the SR will be required to
1207 snapshot-coalesce this VDI"""
1208 return self._calcExtraSpaceForCoalescing() + \
1209 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
1211 def _getAllSubtree(self):
1212 """Get self and all VDIs in the subtree of self as a flat list"""
1213 vdiList = [self]
1214 for child in self.children:
1215 vdiList.extend(child._getAllSubtree())
1216 return vdiList
1219class FileVDI(VDI):
1220 """Object representing a VDI in a file-based SR (EXT or NFS)"""
1222 @override
1223 @staticmethod
1224 def extractUuid(path):
1225 fileName = os.path.basename(path)
1226 return os.path.splitext(fileName)[0]
1228 def __init__(self, sr, uuid, vdi_type):
1229 VDI.__init__(self, sr, uuid, vdi_type)
1230 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1232 @override
1233 def load(self, info=None) -> None:
1234 if not info:
1235 if not util.pathexists(self.path):
1236 raise util.SMException("%s not found" % self.path)
1237 try:
1238 info = self.cowutil.getInfo(self.path, self.extractUuid)
1239 except util.SMException:
1240 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid)
1241 return
1242 self.parent = None
1243 self.children = []
1244 self.parentUuid = info.parentUuid
1245 self.sizeVirt = info.sizeVirt
1246 self._sizePhys = info.sizePhys
1247 self._sizeAllocated = info.sizeAllocated
1248 self.hidden = info.hidden
1249 self.scanError = False
1250 self.path = os.path.join(self.sr.path, "%s%s" % \
1251 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]))
1253 @override
1254 def rename(self, uuid) -> None:
1255 oldPath = self.path
1256 VDI.rename(self, uuid)
1257 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1258 self.path = os.path.join(self.sr.path, self.fileName)
1259 assert(not util.pathexists(self.path))
1260 Util.log("Renaming %s -> %s" % (oldPath, self.path))
1261 os.rename(oldPath, self.path)
1263 @override
1264 def delete(self) -> None:
1265 if len(self.children) > 0: 1265 ↛ 1266line 1265 didn't jump to line 1266, because the condition on line 1265 was never true
1266 raise util.SMException("VDI %s has children, can't delete" % \
1267 self.uuid)
1268 try:
1269 self.sr.lock()
1270 try:
1271 os.unlink(self.path)
1272 self.sr.forgetVDI(self.uuid)
1273 finally:
1274 self.sr.unlock()
1275 except OSError:
1276 raise util.SMException("os.unlink(%s) failed" % self.path)
1277 VDI.delete(self)
1279 @override
1280 def getAllocatedSize(self) -> int:
1281 if self._sizeAllocated == -1: 1281 ↛ 1282line 1281 didn't jump to line 1282, because the condition on line 1281 was never true
1282 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1283 return self._sizeAllocated
1286class LVMVDI(VDI):
1287 """Object representing a VDI in an LVM SR"""
1289 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent
1291 @override
1292 def load(self, info=None) -> None:
1293 # `info` is always set. `None` default value is only here to match parent method.
1294 assert info, "No info given to LVMVDI.load"
1295 self.parent = None
1296 self.children = []
1297 self._sizePhys = -1
1298 self._sizeAllocated = -1
1299 self.scanError = info.scanError
1300 self.sizeLV = info.sizeLV
1301 self.sizeVirt = info.sizeVirt
1302 self.fileName = info.lvName
1303 self.lvActive = info.lvActive
1304 self.lvOpen = info.lvOpen
1305 self.lvReadonly = info.lvReadonly
1306 self.hidden = info.hidden
1307 self.parentUuid = info.parentUuid
1308 self.path = os.path.join(self.sr.path, self.fileName)
1309 self.lvmcowutil = LvmCowUtil(self.cowutil)
1311 @override
1312 @staticmethod
1313 def extractUuid(path):
1314 return LvmCowUtil.extractUuid(path)
1316 def inflate(self, size):
1317 """inflate the LV containing the COW image to 'size'"""
1318 if not VdiType.isCowImage(self.vdi_type):
1319 return
1320 self._activate()
1321 self.sr.lock()
1322 try:
1323 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size)
1324 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid)
1325 finally:
1326 self.sr.unlock()
1327 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1328 self._sizePhys = -1
1329 self._sizeAllocated = -1
1331 def deflate(self):
1332 """deflate the LV containing the image to minimum"""
1333 if not VdiType.isCowImage(self.vdi_type):
1334 return
1335 self._activate()
1336 self.sr.lock()
1337 try:
1338 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys())
1339 finally:
1340 self.sr.unlock()
1341 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1342 self._sizePhys = -1
1343 self._sizeAllocated = -1
1345 def inflateFully(self):
1346 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt))
1348 def inflateParentForCoalesce(self):
1349 """Inflate the parent only as much as needed for the purposes of
1350 coalescing"""
1351 if not VdiType.isCowImage(self.parent.vdi_type):
1352 return
1353 inc = self._calcExtraSpaceForCoalescing()
1354 if inc > 0:
1355 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid)
1356 self.parent.inflate(self.parent.sizeLV + inc)
1358 @override
1359 def updateBlockInfo(self) -> Optional[str]:
1360 if VdiType.isCowImage(self.vdi_type):
1361 return VDI.updateBlockInfo(self)
1362 return None
1364 @override
1365 def rename(self, uuid) -> None:
1366 oldUuid = self.uuid
1367 oldLVName = self.fileName
1368 VDI.rename(self, uuid)
1369 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid
1370 self.path = os.path.join(self.sr.path, self.fileName)
1371 assert(not self.sr.lvmCache.checkLV(self.fileName))
1373 self.sr.lvmCache.rename(oldLVName, self.fileName)
1374 if self.sr.lvActivator.get(oldUuid, False):
1375 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
1377 ns = NS_PREFIX_LVM + self.sr.uuid
1378 (cnt, bcnt) = RefCounter.check(oldUuid, ns)
1379 RefCounter.set(self.uuid, cnt, bcnt, ns)
1380 RefCounter.reset(oldUuid, ns)
1382 @override
1383 def delete(self) -> None:
1384 if len(self.children) > 0:
1385 raise util.SMException("VDI %s has children, can't delete" % \
1386 self.uuid)
1387 self.sr.lock()
1388 try:
1389 self.sr.lvmCache.remove(self.fileName)
1390 self.sr.forgetVDI(self.uuid)
1391 finally:
1392 self.sr.unlock()
1393 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
1394 VDI.delete(self)
1396 @override
1397 def getSizePhys(self) -> int:
1398 if self._sizePhys == -1:
1399 self._loadInfoSizePhys()
1400 return self._sizePhys
1402 def _loadInfoSizePhys(self):
1403 """Get the physical utilization of the COW image file. We do it individually
1404 (and not using the COW batch scanner) as an optimization: this info is
1405 relatively expensive and we need it only for VDI's involved in
1406 coalescing."""
1407 if not VdiType.isCowImage(self.vdi_type):
1408 return
1409 self._activate()
1410 self._sizePhys = self.cowutil.getSizePhys(self.path)
1411 if self._sizePhys <= 0:
1412 raise util.SMException("phys size of %s = %d" % \
1413 (self, self._sizePhys))
1415 @override
1416 def getAllocatedSize(self) -> int:
1417 if self._sizeAllocated == -1:
1418 self._loadInfoSizeAllocated()
1419 return self._sizeAllocated
1421 def _loadInfoSizeAllocated(self):
1422 """
1423 Get the allocated size of the COW volume.
1424 """
1425 if not VdiType.isCowImage(self.vdi_type):
1426 return
1427 self._activate()
1428 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1430 @override
1431 def _loadInfoHidden(self) -> None:
1432 if not VdiType.isCowImage(self.vdi_type):
1433 self.hidden = self.sr.lvmCache.getHidden(self.fileName)
1434 else:
1435 VDI._loadInfoHidden(self)
1437 @override
1438 def _setHidden(self, hidden=True) -> None:
1439 if not VdiType.isCowImage(self.vdi_type):
1440 self.sr.lvmCache.setHidden(self.fileName, hidden)
1441 self.hidden = hidden
1442 else:
1443 VDI._setHidden(self, hidden)
1445 @override
1446 def __str__(self) -> str:
1447 strType = self.vdi_type
1448 if self.vdi_type == VdiType.RAW:
1449 strType = "RAW"
1450 strHidden = ""
1451 if self.hidden:
1452 strHidden = "*"
1453 strSizePhys = ""
1454 if self._sizePhys > 0:
1455 strSizePhys = Util.num2str(self._sizePhys)
1456 strSizeAllocated = ""
1457 if self._sizeAllocated >= 0:
1458 strSizeAllocated = Util.num2str(self._sizeAllocated)
1459 strActive = "n"
1460 if self.lvActive:
1461 strActive = "a"
1462 if self.lvOpen:
1463 strActive += "o"
1464 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType,
1465 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated,
1466 Util.num2str(self.sizeLV), strActive)
1468 @override
1469 def validate(self, fast=False) -> None:
1470 if VdiType.isCowImage(self.vdi_type):
1471 VDI.validate(self, fast)
1473 def _setChainRw(self) -> List[str]:
1474 """
1475 Set the readonly LV and children writable.
1476 It's needed because the coalesce can be done by tapdisk directly
1477 and it will need to write parent information for children.
1478 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part.
1479 Return a list of the LV that were previously readonly to be made RO again after the coalesce.
1480 """
1481 was_ro = []
1482 if self.lvReadonly:
1483 self.sr.lvmCache.setReadonly(self.fileName, False)
1484 was_ro.append(self.fileName)
1486 for child in self.children:
1487 if child.lvReadonly:
1488 self.sr.lvmCache.setReadonly(child.fileName, False)
1489 was_ro.append(child.fileName)
1491 return was_ro
1493 def _setChainRo(self, was_ro: List[str]) -> None:
1494 """Set the list of LV in parameters to readonly"""
1495 for lvName in was_ro:
1496 self.sr.lvmCache.setReadonly(lvName, True)
1498 @override
1499 def _doCoalesce(self) -> None:
1500 """LVMVDI parents must first be activated, inflated, and made writable"""
1501 was_ro = []
1502 try:
1503 self._activateChain()
1504 self.sr.lvmCache.setReadonly(self.parent.fileName, False)
1505 self.parent.validate()
1506 self.inflateParentForCoalesce()
1507 was_ro = self._setChainRw()
1508 VDI._doCoalesce(self)
1509 finally:
1510 self.parent._loadInfoSizePhys()
1511 self.parent.deflate()
1512 self.sr.lvmCache.setReadonly(self.parent.fileName, True)
1513 self._setChainRo(was_ro)
1515 @override
1516 def _setParent(self, parent) -> None:
1517 self._activate()
1518 if self.lvReadonly:
1519 self.sr.lvmCache.setReadonly(self.fileName, False)
1521 try:
1522 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW)
1523 finally:
1524 if self.lvReadonly:
1525 self.sr.lvmCache.setReadonly(self.fileName, True)
1526 self._deactivate()
1527 self.parent = parent
1528 self.parentUuid = parent.uuid
1529 parent.children.append(self)
1530 try:
1531 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1532 Util.log("Updated the VDI-parent field for child %s with %s" % \
1533 (self.uuid, self.parentUuid))
1534 except:
1535 Util.log("Failed to update the VDI-parent with %s for child %s" % \
1536 (self.parentUuid, self.uuid))
1538 def _activate(self):
1539 self.sr.lvActivator.activate(self.uuid, self.fileName, False)
1541 def _activateChain(self):
1542 vdi = self
1543 while vdi:
1544 vdi._activate()
1545 vdi = vdi.parent
1547 def _deactivate(self):
1548 self.sr.lvActivator.deactivate(self.uuid, False)
1550 @override
1551 def _increaseSizeVirt(self, size, atomic=True) -> None:
1552 "ensure the virtual size of 'self' is at least 'size'"
1553 self._activate()
1554 if VdiType.isCowImage(self.vdi_type):
1555 VDI._increaseSizeVirt(self, size, atomic)
1556 return
1558 # raw VDI case
1559 offset = self.sizeLV
1560 if self.sizeVirt < size:
1561 oldSize = self.sizeLV
1562 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size)
1563 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV))
1564 self.sr.lvmCache.setSize(self.fileName, self.sizeLV)
1565 offset = oldSize
1566 unfinishedZero = False
1567 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid)
1568 if jval:
1569 unfinishedZero = True
1570 offset = int(jval)
1571 length = self.sizeLV - offset
1572 if not length:
1573 return
1575 if unfinishedZero:
1576 Util.log(" ==> Redoing unfinished zeroing out")
1577 else:
1578 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \
1579 str(offset))
1580 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1581 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1582 func = lambda: util.zeroOut(self.path, offset, length)
1583 Util.runAbortable(func, True, self.sr.uuid, abortTest,
1584 VDI.POLL_INTERVAL, 0)
1585 self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
1587 @override
1588 def _setSizeVirt(self, size) -> None:
1589 """WARNING: do not call this method directly unless all VDIs in the
1590 subtree are guaranteed to be unplugged (and remain so for the duration
1591 of the operation): this operation is only safe for offline COW images."""
1592 self._activate()
1593 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid)
1594 try:
1595 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile)
1596 finally:
1597 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid)
1599 @override
1600 def _queryCowBlocks(self) -> bytes:
1601 self._activate()
1602 return VDI._queryCowBlocks(self)
1604 @override
1605 def _calcExtraSpaceForCoalescing(self) -> int:
1606 if not VdiType.isCowImage(self.parent.vdi_type):
1607 return 0 # raw parents are never deflated in the first place
1608 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData())
1609 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1610 return sizeCoalesced - self.parent.sizeLV
1612 @override
1613 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1614 """How much extra space in the SR will be required to
1615 [live-]leaf-coalesce this VDI"""
1616 # we can deflate the leaf to minimize the space requirements
1617 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys())
1618 return self._calcExtraSpaceForCoalescing() - deflateDiff
1620 @override
1621 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1622 return self._calcExtraSpaceForCoalescing() + \
1623 lvutil.calcSizeLV(self.getSizePhys())
1626class LinstorVDI(VDI):
1627 """Object representing a VDI in a LINSTOR SR"""
1629 VOLUME_LOCK_TIMEOUT = 30
1631 @override
1632 def load(self, info=None) -> None:
1633 self.parentUuid = info.parentUuid
1634 self.scanError = True
1635 self.parent = None
1636 self.children = []
1638 self.fileName = self.sr._linstor.get_volume_name(self.uuid)
1639 self.path = self.sr._linstor.build_device_path(self.fileName)
1640 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType)
1642 if not info:
1643 try:
1644 info = self.linstorcowutil.get_info(self.uuid)
1645 except util.SMException:
1646 Util.log(
1647 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid)
1648 )
1649 return
1651 self.parentUuid = info.parentUuid
1652 self.sizeVirt = info.sizeVirt
1653 self._sizePhys = -1
1654 self._sizeAllocated = -1
1655 self.drbd_size = -1
1656 self.hidden = info.hidden
1657 self.scanError = False
1659 @override
1660 def getSizePhys(self, fetch=False) -> int:
1661 if self._sizePhys < 0 or fetch:
1662 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid)
1663 return self._sizePhys
1665 def getDrbdSize(self, fetch=False):
1666 if self.drbd_size < 0 or fetch:
1667 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid)
1668 return self.drbd_size
1670 @override
1671 def getAllocatedSize(self) -> int:
1672 if self._sizeAllocated == -1:
1673 if VdiType.isCowImage(self.vdi_type):
1674 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid)
1675 return self._sizeAllocated
1677 def inflate(self, size):
1678 if not VdiType.isCowImage(self.vdi_type):
1679 return
1680 self.sr.lock()
1681 try:
1682 # Ensure we use the real DRBD size and not the cached one.
1683 # Why? Because this attribute can be changed if volume is resized by user.
1684 self.drbd_size = self.getDrbdSize(fetch=True)
1685 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size)
1686 finally:
1687 self.sr.unlock()
1688 self.drbd_size = -1
1689 self._sizePhys = -1
1690 self._sizeAllocated = -1
1692 def deflate(self):
1693 if not VdiType.isCowImage(self.vdi_type):
1694 return
1695 self.sr.lock()
1696 try:
1697 # Ensure we use the real sizes and not the cached info.
1698 self.drbd_size = self.getDrbdSize(fetch=True)
1699 self._sizePhys = self.getSizePhys(fetch=True)
1700 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False)
1701 finally:
1702 self.sr.unlock()
1703 self.drbd_size = -1
1704 self._sizePhys = -1
1705 self._sizeAllocated = -1
1707 def inflateFully(self):
1708 if VdiType.isCowImage(self.vdi_type):
1709 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt))
1711 @override
1712 def rename(self, uuid) -> None:
1713 Util.log('Renaming {} -> {} (path={})'.format(
1714 self.uuid, uuid, self.path
1715 ))
1716 self.sr._linstor.update_volume_uuid(self.uuid, uuid)
1717 VDI.rename(self, uuid)
1719 @override
1720 def delete(self) -> None:
1721 if len(self.children) > 0:
1722 raise util.SMException(
1723 'VDI {} has children, can\'t delete'.format(self.uuid)
1724 )
1725 self.sr.lock()
1726 try:
1727 self.sr._linstor.destroy_volume(self.uuid)
1728 self.sr.forgetVDI(self.uuid)
1729 finally:
1730 self.sr.unlock()
1731 VDI.delete(self)
1733 @override
1734 def validate(self, fast=False) -> None:
1735 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success:
1736 raise util.SMException('COW image {} corrupted'.format(self))
1738 @override
1739 def pause(self, failfast=False) -> None:
1740 self.sr._linstor.ensure_volume_is_not_locked(
1741 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1742 )
1743 return super(LinstorVDI, self).pause(failfast)
1745 @override
1746 def coalesce(self) -> int:
1747 # Note: We raise `SMException` here to skip the current coalesce in case of failure.
1748 # Using another exception we can't execute the next coalesce calls.
1749 return self.linstorcowutil.force_coalesce(self.path)
1751 @override
1752 def getParent(self) -> str:
1753 return self.linstorcowutil.get_parent(
1754 self.sr._linstor.get_volume_uuid_from_device_path(self.path)
1755 )
1757 @override
1758 def repair(self, parent_uuid) -> None:
1759 self.linstorcowutil.force_repair(
1760 self.sr._linstor.get_device_path(parent_uuid)
1761 )
1763 @override
1764 def _relinkSkip(self) -> None:
1765 abortFlag = IPCFlag(self.sr.uuid)
1766 for child in self.children:
1767 if abortFlag.test(FLAG_TYPE_ABORT):
1768 raise AbortException('Aborting due to signal')
1769 Util.log(
1770 ' Relinking {} from {} to {}'.format(
1771 child, self, self.parent
1772 )
1773 )
1775 session = child.sr.xapi.session
1776 sr_uuid = child.sr.uuid
1777 vdi_uuid = child.uuid
1778 try:
1779 self.sr._linstor.ensure_volume_is_not_locked(
1780 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1781 )
1782 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid)
1783 child._setParent(self.parent)
1784 finally:
1785 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
1786 self.children = []
1788 @override
1789 def _setParent(self, parent) -> None:
1790 self.sr._linstor.get_device_path(self.uuid)
1791 self.linstorcowutil.force_parent(self.path, parent.path)
1792 self.parent = parent
1793 self.parentUuid = parent.uuid
1794 parent.children.append(self)
1795 try:
1796 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1797 Util.log("Updated the vhd-parent field for child %s with %s" % \
1798 (self.uuid, self.parentUuid))
1799 except:
1800 Util.log("Failed to update %s with vhd-parent field %s" % \
1801 (self.uuid, self.parentUuid))
1803 @override
1804 def _doCoalesce(self) -> None:
1805 try:
1806 self._activateChain()
1807 self.parent.validate()
1808 self._inflateParentForCoalesce()
1809 VDI._doCoalesce(self)
1810 finally:
1811 self.parent.deflate()
1813 def _activateChain(self):
1814 vdi = self
1815 while vdi:
1816 try:
1817 p = self.sr._linstor.get_device_path(vdi.uuid)
1818 except Exception as e:
1819 # Use SMException to skip coalesce.
1820 # Otherwise the GC is stopped...
1821 raise util.SMException(str(e))
1822 vdi = vdi.parent
1824 @override
1825 def _setHidden(self, hidden=True) -> None:
1826 HIDDEN_TAG = 'hidden'
1828 if not VdiType.isCowImage(self.vdi_type):
1829 self.sr._linstor.update_volume_metadata(self.uuid, {
1830 HIDDEN_TAG: hidden
1831 })
1832 self.hidden = hidden
1833 else:
1834 VDI._setHidden(self, hidden)
1836 @override
1837 def _increaseSizeVirt(self, size, atomic=True):
1838 if self.vdi_type == VdiType.RAW:
1839 offset = self.drbd_size
1840 if self.sizeVirt < size:
1841 oldSize = self.drbd_size
1842 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size)
1843 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size))
1844 self.sr._linstor.resize_volume(self.uuid, self.drbd_size)
1845 offset = oldSize
1846 unfinishedZero = False
1847 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid)
1848 if jval:
1849 unfinishedZero = True
1850 offset = int(jval)
1851 length = self.drbd_size - offset
1852 if not length:
1853 return
1855 if unfinishedZero:
1856 Util.log(" ==> Redoing unfinished zeroing out")
1857 else:
1858 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset))
1859 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1860 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1861 func = lambda: util.zeroOut(self.path, offset, length)
1862 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
1863 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid)
1864 return
1866 if self.sizeVirt >= size:
1867 return
1868 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1869 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1871 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024
1872 if (size <= msize):
1873 self.linstorcowutil.set_size_virt_fast(self.path, size)
1874 else:
1875 if atomic:
1876 vdiList = self._getAllSubtree()
1877 self.sr.lock()
1878 try:
1879 self.sr.pauseVDIs(vdiList)
1880 try:
1881 self._setSizeVirt(size)
1882 finally:
1883 self.sr.unpauseVDIs(vdiList)
1884 finally:
1885 self.sr.unlock()
1886 else:
1887 self._setSizeVirt(size)
1889 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid)
1891 @override
1892 def _setSizeVirt(self, size) -> None:
1893 jfile = self.uuid + '-jvhd'
1894 self.sr._linstor.create_volume(
1895 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile
1896 )
1897 try:
1898 self.inflate(self.linstorcowutil.compute_volume_size(size))
1899 self.linstorcowutil.set_size_virt(self.path, size, jfile)
1900 finally:
1901 try:
1902 self.sr._linstor.destroy_volume(jfile)
1903 except Exception:
1904 # We can ignore it, in any case this volume is not persistent.
1905 pass
1907 @override
1908 def _queryCowBlocks(self) -> bytes:
1909 return self.linstorcowutil.get_block_bitmap(self.uuid)
1911 def _inflateParentForCoalesce(self):
1912 if not VdiType.isCowImage(self.parent.vdi_type):
1913 return
1914 inc = self._calcExtraSpaceForCoalescing()
1915 if inc > 0:
1916 self.parent.inflate(self.parent.getDrbdSize() + inc)
1918 @override
1919 def _calcExtraSpaceForCoalescing(self) -> int:
1920 if not VdiType.isCowImage(self.parent.vdi_type):
1921 return 0
1922 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData())
1923 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced))
1924 return size_coalesced - self.parent.getDrbdSize()
1926 @override
1927 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1928 assert self.getDrbdSize() > 0
1929 assert self.getSizePhys() > 0
1930 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1931 assert deflate_diff >= 0
1932 return self._calcExtraSpaceForCoalescing() - deflate_diff
1934 @override
1935 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1936 assert self.getSizePhys() > 0
1937 return self._calcExtraSpaceForCoalescing() + \
1938 LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1940################################################################################
1941#
1942# SR
1943#
1944class SR(object):
1945 class LogFilter:
1946 def __init__(self, sr):
1947 self.sr = sr
1948 self.stateLogged = False
1949 self.prevState = {}
1950 self.currState = {}
1952 def logState(self):
1953 changes = ""
1954 self.currState.clear()
1955 for vdi in self.sr.vdiTrees:
1956 self.currState[vdi.uuid] = self._getTreeStr(vdi)
1957 if not self.prevState.get(vdi.uuid) or \
1958 self.prevState[vdi.uuid] != self.currState[vdi.uuid]:
1959 changes += self.currState[vdi.uuid]
1961 for uuid in self.prevState:
1962 if not self.currState.get(uuid):
1963 changes += "Tree %s gone\n" % uuid
1965 result = "SR %s (%d VDIs in %d COW trees): " % \
1966 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
1968 if len(changes) > 0:
1969 if self.stateLogged:
1970 result += "showing only COW trees that changed:"
1971 result += "\n%s" % changes
1972 else:
1973 result += "no changes"
1975 for line in result.split("\n"):
1976 Util.log("%s" % line)
1977 self.prevState.clear()
1978 for key, val in self.currState.items():
1979 self.prevState[key] = val
1980 self.stateLogged = True
1982 def logNewVDI(self, uuid):
1983 if self.stateLogged:
1984 Util.log("Found new VDI when scanning: %s" % uuid)
1986 def _getTreeStr(self, vdi, indent=8):
1987 treeStr = "%s%s\n" % (" " * indent, vdi)
1988 for child in vdi.children:
1989 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT)
1990 return treeStr
1992 TYPE_FILE = "file"
1993 TYPE_LVHD = "lvhd"
1994 TYPE_LINSTOR = "linstor"
1995 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR]
1997 LOCK_RETRY_INTERVAL = 3
1998 LOCK_RETRY_ATTEMPTS = 20
1999 LOCK_RETRY_ATTEMPTS_LOCK = 100
2001 SCAN_RETRY_ATTEMPTS = 3
2003 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM)
2004 TMP_RENAME_PREFIX = "OLD_"
2006 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline"
2007 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override"
2009 @staticmethod
2010 def getInstance(uuid, xapiSession, createLock=True, force=False):
2011 xapi = XAPI(xapiSession, uuid)
2012 type = normalizeType(xapi.srRecord["type"])
2013 if type == SR.TYPE_FILE:
2014 return FileSR(uuid, xapi, createLock, force)
2015 elif type == SR.TYPE_LVHD:
2016 return LVMSR(uuid, xapi, createLock, force)
2017 elif type == SR.TYPE_LINSTOR:
2018 return LinstorSR(uuid, xapi, createLock, force)
2019 raise util.SMException("SR type %s not recognized" % type)
2021 def __init__(self, uuid, xapi, createLock, force):
2022 self.logFilter = self.LogFilter(self)
2023 self.uuid = uuid
2024 self.path = ""
2025 self.name = ""
2026 self.vdis = {}
2027 self.vdiTrees = []
2028 self.journaler = None
2029 self.xapi = xapi
2030 self._locked = 0
2031 self._srLock = None
2032 if createLock: 2032 ↛ 2033line 2032 didn't jump to line 2033, because the condition on line 2032 was never true
2033 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid)
2034 else:
2035 Util.log("Requested no SR locking")
2036 self.name = self.xapi.srRecord["name_label"]
2037 self._failedCoalesceTargets = []
2039 if not self.xapi.isPluggedHere():
2040 if force: 2040 ↛ 2041line 2040 didn't jump to line 2041, because the condition on line 2040 was never true
2041 Util.log("SR %s not attached on this host, ignoring" % uuid)
2042 else:
2043 if not self.wait_for_plug():
2044 raise util.SMException("SR %s not attached on this host" % uuid)
2046 if force: 2046 ↛ 2047line 2046 didn't jump to line 2047, because the condition on line 2046 was never true
2047 Util.log("Not checking if we are Master (SR %s)" % uuid)
2048 elif not self.xapi.isMaster(): 2048 ↛ 2049line 2048 didn't jump to line 2049, because the condition on line 2048 was never true
2049 raise util.SMException("This host is NOT master, will not run")
2051 self.no_space_candidates = {}
2053 def msg_cleared(self, xapi_session, msg_ref):
2054 try:
2055 msg = xapi_session.xenapi.message.get_record(msg_ref)
2056 except XenAPI.Failure:
2057 return True
2059 return msg is None
2061 def check_no_space_candidates(self):
2062 xapi_session = self.xapi.getSession()
2064 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE)
2065 if self.no_space_candidates:
2066 if msg_id is None or self.msg_cleared(xapi_session, msg_id):
2067 util.SMlog("Could not coalesce due to a lack of space "
2068 f"in SR {self.uuid}")
2069 msg_body = ("Unable to perform data coalesce due to a lack "
2070 f"of space in SR {self.uuid}")
2071 msg_id = xapi_session.xenapi.message.create(
2072 'SM_GC_NO_SPACE',
2073 3,
2074 "SR",
2075 self.uuid,
2076 msg_body)
2077 xapi_session.xenapi.SR.remove_from_sm_config(
2078 self.xapi.srRef, VDI.DB_GC_NO_SPACE)
2079 xapi_session.xenapi.SR.add_to_sm_config(
2080 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id)
2082 for candidate in self.no_space_candidates.values():
2083 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id)
2084 elif msg_id is not None:
2085 # Everything was coalescable, remove the message
2086 xapi_session.xenapi.message.destroy(msg_id)
2088 def clear_no_space_msg(self, vdi):
2089 msg_id = None
2090 try:
2091 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE)
2092 except XenAPI.Failure:
2093 pass
2095 self.no_space_candidates.pop(vdi.uuid, None)
2096 if msg_id is not None: 2096 ↛ exitline 2096 didn't return from function 'clear_no_space_msg', because the condition on line 2096 was never false
2097 vdi.delConfig(VDI.DB_GC_NO_SPACE)
2100 def wait_for_plug(self):
2101 for _ in range(1, 10):
2102 time.sleep(2)
2103 if self.xapi.isPluggedHere():
2104 return True
2105 return False
2107 def gcEnabled(self, refresh=True):
2108 if refresh:
2109 self.xapi.srRecord = \
2110 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef)
2111 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false":
2112 Util.log("GC is disabled for this SR, abort")
2113 return False
2114 return True
2116 def scan(self, force=False) -> None:
2117 """Scan the SR and load VDI info for each VDI. If called repeatedly,
2118 update VDI objects if they already exist"""
2119 pass
2121 def scanLocked(self, force=False):
2122 self.lock()
2123 try:
2124 self.scan(force)
2125 finally:
2126 self.unlock()
2128 def getVDI(self, uuid):
2129 return self.vdis.get(uuid)
2131 def hasWork(self):
2132 if len(self.findGarbage()) > 0:
2133 return True
2134 if self.findCoalesceable():
2135 return True
2136 if self.findLeafCoalesceable():
2137 return True
2138 if self.needUpdateBlockInfo():
2139 return True
2140 return False
2142 def findCoalesceable(self):
2143 """Find a coalesceable VDI. Return a vdi that should be coalesced
2144 (choosing one among all coalesceable candidates according to some
2145 criteria) or None if there is no VDI that could be coalesced"""
2147 candidates = []
2149 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE)
2150 if srSwitch == "false":
2151 Util.log("Coalesce disabled for this SR")
2152 return candidates
2154 # finish any VDI for which a relink journal entry exists first
2155 journals = self.journaler.getAll(VDI.JRN_RELINK)
2156 for uuid in journals:
2157 vdi = self.getVDI(uuid)
2158 if vdi and vdi not in self._failedCoalesceTargets:
2159 return vdi
2161 for vdi in self.vdis.values():
2162 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
2163 candidates.append(vdi)
2164 Util.log("%s is coalescable" % vdi.uuid)
2166 self.xapi.update_task_progress("coalescable", len(candidates))
2168 # pick one in the tallest tree
2169 treeHeight = dict()
2170 for c in candidates:
2171 height = c.getTreeRoot().getTreeHeight()
2172 if treeHeight.get(height):
2173 treeHeight[height].append(c)
2174 else:
2175 treeHeight[height] = [c]
2177 freeSpace = self.getFreeSpace()
2178 heights = list(treeHeight.keys())
2179 heights.sort(reverse=True)
2180 for h in heights:
2181 for c in treeHeight[h]:
2182 spaceNeeded = c._calcExtraSpaceForCoalescing()
2183 if spaceNeeded <= freeSpace:
2184 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h))
2185 self.clear_no_space_msg(c)
2186 return c
2187 else:
2188 self.no_space_candidates[c.uuid] = c
2189 Util.log("No space to coalesce %s (free space: %d)" % \
2190 (c, freeSpace))
2191 return None
2193 def getSwitch(self, key):
2194 return self.xapi.srRecord["other_config"].get(key)
2196 def forbiddenBySwitch(self, switch, condition, fail_msg):
2197 srSwitch = self.getSwitch(switch)
2198 ret = False
2199 if srSwitch:
2200 ret = srSwitch == condition
2202 if ret:
2203 Util.log(fail_msg)
2205 return ret
2207 def leafCoalesceForbidden(self):
2208 return (self.forbiddenBySwitch(VDI.DB_COALESCE,
2209 "false",
2210 "Coalesce disabled for this SR") or
2211 self.forbiddenBySwitch(VDI.DB_LEAFCLSC,
2212 VDI.LEAFCLSC_DISABLED,
2213 "Leaf-coalesce disabled for this SR"))
2215 def findLeafCoalesceable(self):
2216 """Find leaf-coalesceable VDIs in each COW tree"""
2218 candidates = []
2219 if self.leafCoalesceForbidden():
2220 return candidates
2222 self.gatherLeafCoalesceable(candidates)
2224 self.xapi.update_task_progress("coalescable", len(candidates))
2226 freeSpace = self.getFreeSpace()
2227 for candidate in candidates:
2228 # check the space constraints to see if leaf-coalesce is actually
2229 # feasible for this candidate
2230 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing()
2231 spaceNeededLive = spaceNeeded
2232 if spaceNeeded > freeSpace:
2233 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing()
2234 if candidate.canLiveCoalesce(self.getStorageSpeed()):
2235 spaceNeeded = spaceNeededLive
2237 if spaceNeeded <= freeSpace:
2238 Util.log("Leaf-coalesce candidate: %s" % candidate)
2239 self.clear_no_space_msg(candidate)
2240 return candidate
2241 else:
2242 Util.log("No space to leaf-coalesce %s (free space: %d)" % \
2243 (candidate, freeSpace))
2244 if spaceNeededLive <= freeSpace:
2245 Util.log("...but enough space if skip snap-coalesce")
2246 candidate.setConfig(VDI.DB_LEAFCLSC,
2247 VDI.LEAFCLSC_OFFLINE)
2248 self.no_space_candidates[candidate.uuid] = candidate
2250 return None
2252 def gatherLeafCoalesceable(self, candidates):
2253 for vdi in self.vdis.values():
2254 if not vdi.isLeafCoalesceable():
2255 continue
2256 if vdi in self._failedCoalesceTargets:
2257 continue
2258 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET:
2259 Util.log("Skipping reset-on-boot %s" % vdi)
2260 continue
2261 if vdi.getConfig(vdi.DB_ALLOW_CACHING):
2262 Util.log("Skipping allow_caching=true %s" % vdi)
2263 continue
2264 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED:
2265 Util.log("Leaf-coalesce disabled for %s" % vdi)
2266 continue
2267 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or
2268 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
2269 continue
2270 candidates.append(vdi)
2272 def coalesce(self, vdi, dryRun=False):
2273 """Coalesce vdi onto parent"""
2274 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent))
2275 if dryRun: 2275 ↛ 2276line 2275 didn't jump to line 2276, because the condition on line 2275 was never true
2276 return
2278 try:
2279 self._coalesce(vdi)
2280 except util.SMException as e:
2281 if isinstance(e, AbortException): 2281 ↛ 2282line 2281 didn't jump to line 2282, because the condition on line 2281 was never true
2282 self.cleanup()
2283 raise
2284 else:
2285 self._failedCoalesceTargets.append(vdi)
2286 Util.logException("coalesce")
2287 Util.log("Coalesce failed, skipping")
2288 self.cleanup()
2290 def coalesceLeaf(self, vdi, dryRun=False):
2291 """Leaf-coalesce vdi onto parent"""
2292 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent))
2293 if dryRun:
2294 return
2296 try:
2297 uuid = vdi.uuid
2298 try:
2299 # "vdi" object will no longer be valid after this call
2300 self._coalesceLeaf(vdi)
2301 finally:
2302 vdi = self.getVDI(uuid)
2303 if vdi:
2304 vdi.delConfig(vdi.DB_LEAFCLSC)
2305 except AbortException:
2306 self.cleanup()
2307 raise
2308 except (util.SMException, XenAPI.Failure) as e:
2309 self._failedCoalesceTargets.append(vdi)
2310 Util.logException("leaf-coalesce")
2311 Util.log("Leaf-coalesce failed on %s, skipping" % vdi)
2312 self.cleanup()
2314 def garbageCollect(self, dryRun=False):
2315 vdiList = self.findGarbage()
2316 Util.log("Found %d VDIs for deletion:" % len(vdiList))
2317 for vdi in vdiList:
2318 Util.log(" %s" % vdi)
2319 if not dryRun:
2320 self.deleteVDIs(vdiList)
2321 self.cleanupJournals(dryRun)
2323 def findGarbage(self):
2324 vdiList = []
2325 for vdi in self.vdiTrees:
2326 vdiList.extend(vdi.getAllPrunable())
2327 return vdiList
2329 def deleteVDIs(self, vdiList) -> None:
2330 for vdi in vdiList:
2331 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT):
2332 raise AbortException("Aborting due to signal")
2333 Util.log("Deleting unlinked VDI %s" % vdi)
2334 self.deleteVDI(vdi)
2336 def deleteVDI(self, vdi) -> None:
2337 assert(len(vdi.children) == 0)
2338 del self.vdis[vdi.uuid]
2339 if vdi.parent: 2339 ↛ 2341line 2339 didn't jump to line 2341, because the condition on line 2339 was never false
2340 vdi.parent.children.remove(vdi)
2341 if vdi in self.vdiTrees: 2341 ↛ 2342line 2341 didn't jump to line 2342, because the condition on line 2341 was never true
2342 self.vdiTrees.remove(vdi)
2343 vdi.delete()
2345 def forgetVDI(self, vdiUuid) -> None:
2346 self.xapi.forgetVDI(self.uuid, vdiUuid)
2348 def pauseVDIs(self, vdiList) -> None:
2349 paused = []
2350 failed = False
2351 for vdi in vdiList:
2352 try:
2353 vdi.pause()
2354 paused.append(vdi)
2355 except:
2356 Util.logException("pauseVDIs")
2357 failed = True
2358 break
2360 if failed:
2361 self.unpauseVDIs(paused)
2362 raise util.SMException("Failed to pause VDIs")
2364 def unpauseVDIs(self, vdiList):
2365 failed = False
2366 for vdi in vdiList:
2367 try:
2368 vdi.unpause()
2369 except:
2370 Util.log("ERROR: Failed to unpause VDI %s" % vdi)
2371 failed = True
2372 if failed:
2373 raise util.SMException("Failed to unpause VDIs")
2375 def getFreeSpace(self) -> int:
2376 return 0
2378 def cleanup(self):
2379 Util.log("In cleanup")
2380 return
2382 @override
2383 def __str__(self) -> str:
2384 if self.name:
2385 ret = "%s ('%s')" % (self.uuid[0:4], self.name)
2386 else:
2387 ret = "%s" % self.uuid
2388 return ret
2390 def lock(self):
2391 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort
2392 signal to avoid deadlocking (trying to acquire the SR lock while the
2393 lock is held by a process that is trying to abort us)"""
2394 if not self._srLock:
2395 return
2397 if self._locked == 0:
2398 abortFlag = IPCFlag(self.uuid)
2399 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK):
2400 if self._srLock.acquireNoblock():
2401 self._locked += 1
2402 return
2403 if abortFlag.test(FLAG_TYPE_ABORT):
2404 raise AbortException("Abort requested")
2405 time.sleep(SR.LOCK_RETRY_INTERVAL)
2406 raise util.SMException("Unable to acquire the SR lock")
2408 self._locked += 1
2410 def unlock(self):
2411 if not self._srLock: 2411 ↛ 2413line 2411 didn't jump to line 2413, because the condition on line 2411 was never false
2412 return
2413 assert(self._locked > 0)
2414 self._locked -= 1
2415 if self._locked == 0:
2416 self._srLock.release()
2418 def needUpdateBlockInfo(self) -> bool:
2419 for vdi in self.vdis.values():
2420 if vdi.scanError or len(vdi.children) == 0:
2421 continue
2422 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2423 return True
2424 return False
2426 def updateBlockInfo(self) -> None:
2427 for vdi in self.vdis.values():
2428 if vdi.scanError or len(vdi.children) == 0:
2429 continue
2430 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2431 vdi.updateBlockInfo()
2433 def cleanupCoalesceJournals(self):
2434 """Remove stale coalesce VDI indicators"""
2435 entries = self.journaler.getAll(VDI.JRN_COALESCE)
2436 for uuid, jval in entries.items():
2437 self.journaler.remove(VDI.JRN_COALESCE, uuid)
2439 def cleanupJournals(self, dryRun=False):
2440 """delete journal entries for non-existing VDIs"""
2441 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]:
2442 entries = self.journaler.getAll(t)
2443 for uuid, jval in entries.items():
2444 if self.getVDI(uuid):
2445 continue
2446 if t == SR.JRN_CLONE:
2447 baseUuid, clonUuid = jval.split("_")
2448 if self.getVDI(baseUuid):
2449 continue
2450 Util.log(" Deleting stale '%s' journal entry for %s "
2451 "(%s)" % (t, uuid, jval))
2452 if not dryRun:
2453 self.journaler.remove(t, uuid)
2455 def cleanupCache(self, maxAge=-1) -> int:
2456 return 0
2458 def _hasLeavesAttachedOn(self, vdi: VDI):
2459 leaves = vdi.getAllLeaves()
2460 leaves_vdi = [leaf.uuid for leaf in leaves]
2461 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi)
2463 def _gc_running_file(self, vdi: VDI):
2464 run_file = "gc_running_{}".format(vdi.uuid)
2465 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file)
2467 def _create_running_file(self, vdi: VDI):
2468 with open(self._gc_running_file(vdi), "w") as f:
2469 f.write("1")
2471 def _delete_running_file(self, vdi: VDI):
2472 os.unlink(self._gc_running_file(vdi))
2474 def _coalesce(self, vdi: VDI):
2475 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2475 ↛ 2478line 2475 didn't jump to line 2478, because the condition on line 2475 was never true
2476 # this means we had done the actual coalescing already and just
2477 # need to finish relinking and/or refreshing the children
2478 Util.log("==> Coalesce apparently already done: skipping")
2479 else:
2480 # JRN_COALESCE is used to check which VDI is being coalesced in
2481 # order to decide whether to abort the coalesce. We remove the
2482 # journal as soon as the COW coalesce step is done, because we
2483 # don't expect the rest of the process to take long
2485 if os.path.exists(self._gc_running_file(vdi)): 2485 ↛ 2486line 2485 didn't jump to line 2486, because the condition on line 2485 was never true
2486 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid))
2488 self._create_running_file(vdi)
2490 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1")
2491 host_refs = self._hasLeavesAttachedOn(vdi)
2492 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time
2493 if len(host_refs) > 1: 2493 ↛ 2494line 2493 didn't jump to line 2494, because the condition on line 2493 was never true
2494 util.SMlog("Not coalesceable, chain activated more than once")
2495 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error
2497 try:
2498 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2498 ↛ 2500line 2498 didn't jump to line 2500, because the condition on line 2498 was never true
2499 #Leaf opened on another host, we need to call online coalesce
2500 util.SMlog("Remote coalesce for {}".format(vdi.path))
2501 vdi._doCoalesceOnHost(list(host_refs)[0])
2502 else:
2503 util.SMlog("Offline coalesce for {}".format(vdi.path))
2504 vdi._doCoalesce()
2505 except Exception as e:
2506 util.SMlog("EXCEPTION while coalescing: {}".format(e))
2507 self._delete_running_file(vdi)
2508 raise
2510 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
2511 self._delete_running_file(vdi)
2513 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid)
2515 # we now need to relink the children: lock the SR to prevent ops
2516 # like SM.clone from manipulating the VDIs we'll be relinking and
2517 # rescan the SR first in case the children changed since the last
2518 # scan
2519 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
2521 self.lock()
2522 try:
2523 vdi.parent._tagChildrenForRelink()
2524 self.scan()
2525 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues
2526 finally:
2527 self.unlock()
2528 # Reload the children to leave things consistent
2529 vdi.parent._reloadChildren(vdi)
2530 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
2532 self.deleteVDI(vdi)
2534 class CoalesceTracker:
2535 GRACE_ITERATIONS = 2
2536 MAX_ITERATIONS_NO_PROGRESS = 3
2537 MAX_ITERATIONS = 10
2538 MAX_INCREASE_FROM_MINIMUM = 1.2
2539 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \
2540 " --> Final size {finSize}"
2542 def __init__(self, sr):
2543 self.itsNoProgress = 0
2544 self.its = 0
2545 self.minSize = float("inf")
2546 self.history = []
2547 self.reason = ""
2548 self.startSize = None
2549 self.finishSize = None
2550 self.sr = sr
2551 self.grace_remaining = self.GRACE_ITERATIONS
2553 def abortCoalesce(self, prevSize, curSize):
2554 self.its += 1
2555 self.history.append(self.HISTORY_STRING.format(its=self.its,
2556 initSize=prevSize,
2557 finSize=curSize))
2559 self.finishSize = curSize
2561 if self.startSize is None:
2562 self.startSize = prevSize
2564 if curSize < self.minSize:
2565 self.minSize = curSize
2567 if prevSize < self.minSize:
2568 self.minSize = prevSize
2570 if self.its == 1:
2571 # Skip evaluating conditions on first iteration
2572 return False
2574 if prevSize < curSize:
2575 self.itsNoProgress += 1
2576 Util.log("No progress, attempt:"
2577 " {attempt}".format(attempt=self.itsNoProgress))
2578 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid)
2579 else:
2580 # We made progress
2581 return False
2583 if self.its > self.MAX_ITERATIONS:
2584 max = self.MAX_ITERATIONS
2585 self.reason = \
2586 "Max iterations ({max}) exceeded".format(max=max)
2587 return True
2589 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS:
2590 max = self.MAX_ITERATIONS_NO_PROGRESS
2591 self.reason = \
2592 "No progress made for {max} iterations".format(max=max)
2593 return True
2595 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize
2596 if curSize > maxSizeFromMin:
2597 self.grace_remaining -= 1
2598 if self.grace_remaining == 0:
2599 self.reason = "Unexpected bump in size," \
2600 " compared to minimum achieved"
2602 return True
2604 return False
2606 def printSizes(self):
2607 Util.log("Starting size was {size}"
2608 .format(size=self.startSize))
2609 Util.log("Final size was {size}"
2610 .format(size=self.finishSize))
2611 Util.log("Minimum size achieved was {size}"
2612 .format(size=self.minSize))
2614 def printReasoning(self):
2615 Util.log("Aborted coalesce")
2616 for hist in self.history:
2617 Util.log(hist)
2618 Util.log(self.reason)
2619 self.printSizes()
2621 def printSummary(self):
2622 if self.its == 0:
2623 return
2625 if self.reason: 2625 ↛ 2626line 2625 didn't jump to line 2626, because the condition on line 2625 was never true
2626 Util.log("Aborted coalesce")
2627 Util.log(self.reason)
2628 else:
2629 Util.log("Coalesce summary")
2631 Util.log(f"Performed {self.its} iterations")
2632 self.printSizes()
2635 def _coalesceLeaf(self, vdi):
2636 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
2637 complete due to external changes, namely vdi_delete and vdi_snapshot
2638 that alter leaf-coalescibility of vdi"""
2639 tracker = self.CoalesceTracker(self)
2640 while not vdi.canLiveCoalesce(self.getStorageSpeed()):
2641 prevSizePhys = vdi.getSizePhys()
2642 if not self._snapshotCoalesce(vdi): 2642 ↛ 2643line 2642 didn't jump to line 2643, because the condition on line 2642 was never true
2643 return False
2644 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()):
2645 tracker.printReasoning()
2646 raise util.SMException("VDI {uuid} could not be coalesced"
2647 .format(uuid=vdi.uuid))
2648 tracker.printSummary()
2649 return self._liveLeafCoalesce(vdi)
2651 def calcStorageSpeed(self, startTime, endTime, coalescedSize):
2652 speed = None
2653 total_time = endTime - startTime
2654 if total_time > 0:
2655 speed = float(coalescedSize) / float(total_time)
2656 return speed
2658 def writeSpeedToFile(self, speed):
2659 content = []
2660 speedFile = None
2661 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2662 self.lock()
2663 try:
2664 Util.log("Writing to file: {myfile}".format(myfile=path))
2665 lines = ""
2666 if not os.path.isfile(path):
2667 lines = str(speed) + "\n"
2668 else:
2669 speedFile = open(path, "r+")
2670 content = speedFile.readlines()
2671 content.append(str(speed) + "\n")
2672 if len(content) > N_RUNNING_AVERAGE:
2673 del content[0]
2674 lines = "".join(content)
2676 util.atomicFileWrite(path, VAR_RUN, lines)
2677 finally:
2678 if speedFile is not None:
2679 speedFile.close()
2680 Util.log("Closing file: {myfile}".format(myfile=path))
2681 self.unlock()
2683 def recordStorageSpeed(self, startTime, endTime, coalescedSize):
2684 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize)
2685 if speed is None:
2686 return
2688 self.writeSpeedToFile(speed)
2690 def getStorageSpeed(self):
2691 speedFile = None
2692 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2693 self.lock()
2694 try:
2695 speed = None
2696 if os.path.isfile(path):
2697 speedFile = open(path)
2698 content = speedFile.readlines()
2699 try:
2700 content = [float(i) for i in content]
2701 except ValueError:
2702 Util.log("Something bad in the speed log:{log}".
2703 format(log=speedFile.readlines()))
2704 return speed
2706 if len(content):
2707 speed = sum(content) / float(len(content))
2708 if speed <= 0: 2708 ↛ 2710line 2708 didn't jump to line 2710, because the condition on line 2708 was never true
2709 # Defensive, should be impossible.
2710 Util.log("Bad speed: {speed} calculated for SR: {uuid}".
2711 format(speed=speed, uuid=self.uuid))
2712 speed = None
2713 else:
2714 Util.log("Speed file empty for SR: {uuid}".
2715 format(uuid=self.uuid))
2716 else:
2717 Util.log("Speed log missing for SR: {uuid}".
2718 format(uuid=self.uuid))
2719 return speed
2720 finally:
2721 if not (speedFile is None):
2722 speedFile.close()
2723 self.unlock()
2725 def _snapshotCoalesce(self, vdi):
2726 # Note that because we are not holding any locks here, concurrent SM
2727 # operations may change this tree under our feet. In particular, vdi
2728 # can be deleted, or it can be snapshotted.
2729 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED)
2730 Util.log("Single-snapshotting %s" % vdi)
2731 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid)
2732 try:
2733 ret = self.xapi.singleSnapshotVDI(vdi)
2734 Util.log("Single-snapshot returned: %s" % ret)
2735 except XenAPI.Failure as e:
2736 if util.isInvalidVDI(e):
2737 Util.log("The VDI appears to have been concurrently deleted")
2738 return False
2739 raise
2740 self.scanLocked()
2741 tempSnap = vdi.parent
2742 if not tempSnap.isCoalesceable():
2743 Util.log("The VDI appears to have been concurrently snapshotted")
2744 return False
2745 Util.log("Coalescing parent %s" % tempSnap)
2746 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
2747 sizePhys = vdi.getSizePhys()
2748 self._coalesce(tempSnap)
2749 if not vdi.isLeafCoalesceable():
2750 Util.log("The VDI tree appears to have been altered since")
2751 return False
2752 return True
2754 def _liveLeafCoalesce(self, vdi: VDI) -> bool:
2755 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
2756 self.lock()
2757 try:
2758 self.scan()
2759 if not self.getVDI(vdi.uuid):
2760 Util.log("The VDI appears to have been deleted meanwhile")
2761 return False
2762 if not vdi.isLeafCoalesceable():
2763 Util.log("The VDI is no longer leaf-coalesceable")
2764 return False
2766 uuid = vdi.uuid
2767 vdi.pause(failfast=True)
2768 try:
2769 try:
2770 # "vdi" object will no longer be valid after this call
2771 self._create_running_file(vdi)
2772 self._doCoalesceLeaf(vdi)
2773 except:
2774 Util.logException("_doCoalesceLeaf")
2775 self._handleInterruptedCoalesceLeaf()
2776 raise
2777 finally:
2778 vdi = self.getVDI(uuid)
2779 if vdi:
2780 vdi.ensureUnpaused()
2781 self._delete_running_file(vdi)
2782 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid)
2783 if vdiOld:
2784 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2785 self.deleteVDI(vdiOld)
2786 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2787 finally:
2788 self.cleanup()
2789 self.unlock()
2790 self.logFilter.logState()
2791 return True
2793 def _doCoalesceLeaf(self, vdi: VDI):
2794 """Actual coalescing of a leaf VDI onto parent. Must be called in an
2795 offline/atomic context"""
2796 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid)
2797 self._prepareCoalesceLeaf(vdi)
2798 vdi.parent._setHidden(False)
2799 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False)
2800 vdi.validate(True)
2801 vdi.parent.validate(True)
2802 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid)
2803 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT
2804 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE:
2805 Util.log("Leaf-coalesce forced, will not use timeout")
2806 timeout = 0
2807 vdi._coalesceCowImage(timeout)
2808 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid)
2809 vdi.parent.validate(True)
2810 #vdi._verifyContents(timeout / 2)
2812 # rename
2813 vdiUuid = vdi.uuid
2814 oldName = vdi.fileName
2815 origParentUuid = vdi.parent.uuid
2816 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid)
2817 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid)
2818 vdi.parent.rename(vdiUuid)
2819 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid)
2820 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
2822 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is
2823 # garbage
2825 # update the VDI record
2826 vdi.parent.delConfig(VDI.DB_VDI_PARENT)
2827 if vdi.parent.vdi_type == VdiType.RAW:
2828 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW)
2829 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS)
2830 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
2832 self._updateNode(vdi)
2834 # delete the obsolete leaf & inflate the parent (in that order, to
2835 # minimize free space requirements)
2836 parent = vdi.parent
2837 vdi._setHidden(True)
2838 vdi.parent.children = []
2839 vdi.parent = None
2841 extraSpace = self._calcExtraSpaceNeeded(vdi, parent)
2842 freeSpace = self.getFreeSpace()
2843 if freeSpace < extraSpace:
2844 # don't delete unless we need the space: deletion is time-consuming
2845 # because it requires contacting the slaves, and we're paused here
2846 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2847 self.deleteVDI(vdi)
2848 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2850 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid)
2851 self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
2853 self.forgetVDI(origParentUuid)
2854 self._finishCoalesceLeaf(parent)
2855 self._updateSlavesOnResize(parent)
2857 def _calcExtraSpaceNeeded(self, child, parent) -> int:
2858 assert(VdiType.isCowImage(parent.vdi_type))
2859 extra = child.getSizePhys() - parent.getSizePhys()
2860 if extra < 0:
2861 extra = 0
2862 return extra
2864 def _prepareCoalesceLeaf(self, vdi) -> None:
2865 pass
2867 def _updateNode(self, vdi) -> None:
2868 pass
2870 def _finishCoalesceLeaf(self, parent) -> None:
2871 pass
2873 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
2874 pass
2876 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None:
2877 pass
2879 def _updateSlavesOnResize(self, vdi) -> None:
2880 pass
2882 def _removeStaleVDIs(self, uuidsPresent) -> None:
2883 for uuid in list(self.vdis.keys()):
2884 if not uuid in uuidsPresent:
2885 Util.log("VDI %s disappeared since last scan" % \
2886 self.vdis[uuid])
2887 del self.vdis[uuid]
2889 def _handleInterruptedCoalesceLeaf(self) -> None:
2890 """An interrupted leaf-coalesce operation may leave the COW tree in an
2891 inconsistent state. If the old-leaf VDI is still present, we revert the
2892 operation (in case the original error is persistent); otherwise we must
2893 finish the operation"""
2894 pass
2896 def _buildTree(self, force):
2897 self.vdiTrees = []
2898 for vdi in self.vdis.values():
2899 if vdi.parentUuid:
2900 parent = self.getVDI(vdi.parentUuid)
2901 if not parent:
2902 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX):
2903 self.vdiTrees.append(vdi)
2904 continue
2905 if force:
2906 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \
2907 (vdi.parentUuid, vdi.uuid))
2908 self.vdiTrees.append(vdi)
2909 continue
2910 else:
2911 raise util.SMException("Parent VDI %s of %s not " \
2912 "found" % (vdi.parentUuid, vdi.uuid))
2913 vdi.parent = parent
2914 parent.children.append(vdi)
2915 else:
2916 self.vdiTrees.append(vdi)
2919class FileSR(SR):
2920 TYPE = SR.TYPE_FILE
2921 CACHE_FILE_EXT = ".vhdcache"
2922 # cache cleanup actions
2923 CACHE_ACTION_KEEP = 0
2924 CACHE_ACTION_REMOVE = 1
2925 CACHE_ACTION_REMOVE_IF_INACTIVE = 2
2927 def __init__(self, uuid, xapi, createLock, force):
2928 SR.__init__(self, uuid, xapi, createLock, force)
2929 self.path = "/var/run/sr-mount/%s" % self.uuid
2930 self.journaler = fjournaler.Journaler(self.path)
2932 @override
2933 def scan(self, force=False) -> None:
2934 if not util.pathexists(self.path):
2935 raise util.SMException("directory %s not found!" % self.uuid)
2937 uuidsPresent: List[str] = []
2939 for vdi_type in VDI_COW_TYPES:
2940 scan_result = self._scan(vdi_type, force)
2941 for uuid, image_info in scan_result.items():
2942 vdi = self.getVDI(uuid)
2943 if not vdi:
2944 self.logFilter.logNewVDI(uuid)
2945 vdi = FileVDI(self, uuid, vdi_type)
2946 self.vdis[uuid] = vdi
2947 vdi.load(image_info)
2948 uuidsPresent.extend(scan_result.keys())
2950 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)]
2951 for rawName in rawList:
2952 uuid = FileVDI.extractUuid(rawName)
2953 uuidsPresent.append(uuid)
2954 vdi = self.getVDI(uuid)
2955 if not vdi:
2956 self.logFilter.logNewVDI(uuid)
2957 vdi = FileVDI(self, uuid, VdiType.RAW)
2958 self.vdis[uuid] = vdi
2959 self._removeStaleVDIs(uuidsPresent)
2960 self._buildTree(force)
2961 self.logFilter.logState()
2962 self._handleInterruptedCoalesceLeaf()
2964 @override
2965 def getFreeSpace(self) -> int:
2966 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
2968 @override
2969 def deleteVDIs(self, vdiList) -> None:
2970 rootDeleted = False
2971 for vdi in vdiList:
2972 if not vdi.parent:
2973 rootDeleted = True
2974 break
2975 SR.deleteVDIs(self, vdiList)
2976 if self.xapi.srRecord["type"] == "nfs" and rootDeleted:
2977 self.xapi.markCacheSRsDirty()
2979 @override
2980 def cleanupCache(self, maxAge=-1) -> int:
2981 """Clean up IntelliCache cache files. Caches for leaf nodes are
2982 removed when the leaf node no longer exists or its allow-caching
2983 attribute is not set. Caches for parent nodes are removed when the
2984 parent node no longer exists or it hasn't been used in more than
2985 <maxAge> hours.
2986 Return number of caches removed.
2987 """
2988 numRemoved = 0
2989 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)]
2990 Util.log("Found %d cache files" % len(cacheFiles))
2991 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge)
2992 for cacheFile in cacheFiles:
2993 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)]
2994 action = self.CACHE_ACTION_KEEP
2995 rec = self.xapi.getRecordVDI(uuid)
2996 if not rec:
2997 Util.log("Cache %s: VDI doesn't exist" % uuid)
2998 action = self.CACHE_ACTION_REMOVE
2999 elif rec["managed"] and not rec["allow_caching"]:
3000 Util.log("Cache %s: caching disabled" % uuid)
3001 action = self.CACHE_ACTION_REMOVE
3002 elif not rec["managed"] and maxAge >= 0:
3003 lastAccess = datetime.datetime.fromtimestamp( \
3004 os.path.getatime(os.path.join(self.path, cacheFile)))
3005 if lastAccess < cutoff:
3006 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge))
3007 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
3009 if action == self.CACHE_ACTION_KEEP:
3010 Util.log("Keeping cache %s" % uuid)
3011 continue
3013 lockId = uuid
3014 parentUuid = None
3015 if rec and rec["managed"]:
3016 parentUuid = rec["sm_config"].get("vhd-parent")
3017 if parentUuid:
3018 lockId = parentUuid
3020 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId)
3021 cacheLock.acquire()
3022 try:
3023 if self._cleanupCache(uuid, action):
3024 numRemoved += 1
3025 finally:
3026 cacheLock.release()
3027 return numRemoved
3029 def _cleanupCache(self, uuid, action):
3030 assert(action != self.CACHE_ACTION_KEEP)
3031 rec = self.xapi.getRecordVDI(uuid)
3032 if rec and rec["allow_caching"]:
3033 Util.log("Cache %s appears to have become valid" % uuid)
3034 return False
3036 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT)
3037 tapdisk = blktap2.Tapdisk.find_by_path(fullPath)
3038 if tapdisk:
3039 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE:
3040 Util.log("Cache %s still in use" % uuid)
3041 return False
3042 Util.log("Shutting down tapdisk for %s" % fullPath)
3043 tapdisk.shutdown()
3045 Util.log("Deleting file %s" % fullPath)
3046 os.unlink(fullPath)
3047 return True
3049 def _isCacheFileName(self, name):
3050 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \
3051 name.endswith(self.CACHE_FILE_EXT)
3053 def _scan(self, vdi_type, force):
3054 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3055 error = False
3056 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type])
3057 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid)
3058 for uuid, vdiInfo in scan_result.items():
3059 if vdiInfo.error:
3060 error = True
3061 break
3062 if not error:
3063 return scan_result
3064 Util.log("Scan error on attempt %d" % i)
3065 if force:
3066 return scan_result
3067 raise util.SMException("Scan error")
3069 @override
3070 def deleteVDI(self, vdi) -> None:
3071 self._checkSlaves(vdi)
3072 SR.deleteVDI(self, vdi)
3074 def _checkSlaves(self, vdi):
3075 onlineHosts = self.xapi.getOnlineHosts()
3076 abortFlag = IPCFlag(self.uuid)
3077 for pbdRecord in self.xapi.getAttachedPBDs():
3078 hostRef = pbdRecord["host"]
3079 if hostRef == self.xapi._hostRef:
3080 continue
3081 if abortFlag.test(FLAG_TYPE_ABORT):
3082 raise AbortException("Aborting due to signal")
3083 try:
3084 self._checkSlave(hostRef, vdi)
3085 except util.CommandException:
3086 if hostRef in onlineHosts:
3087 raise
3089 def _checkSlave(self, hostRef, vdi):
3090 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path})
3091 Util.log("Checking with slave: %s" % repr(call))
3092 _host = self.xapi.session.xenapi.host
3093 text = _host.call_plugin( * call)
3095 @override
3096 def _handleInterruptedCoalesceLeaf(self) -> None:
3097 entries = self.journaler.getAll(VDI.JRN_LEAF)
3098 for uuid, parentUuid in entries.items():
3099 fileList = os.listdir(self.path)
3100 childName = uuid + VdiTypeExtension.VHD
3101 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD
3102 parentName1 = parentUuid + VdiTypeExtension.VHD
3103 parentName2 = parentUuid + VdiTypeExtension.RAW
3104 parentPresent = (parentName1 in fileList or parentName2 in fileList)
3105 if parentPresent or tmpChildName in fileList:
3106 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3107 else:
3108 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3109 self.journaler.remove(VDI.JRN_LEAF, uuid)
3110 vdi = self.getVDI(uuid)
3111 if vdi:
3112 vdi.ensureUnpaused()
3114 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3115 Util.log("*** UNDO LEAF-COALESCE")
3116 parent = self.getVDI(parentUuid)
3117 if not parent:
3118 parent = self.getVDI(childUuid)
3119 if not parent:
3120 raise util.SMException("Neither %s nor %s found" % \
3121 (parentUuid, childUuid))
3122 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3123 parent.rename(parentUuid)
3124 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3126 child = self.getVDI(childUuid)
3127 if not child:
3128 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3129 if not child:
3130 raise util.SMException("Neither %s nor %s found" % \
3131 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3132 Util.log("Renaming child back to %s" % childUuid)
3133 child.rename(childUuid)
3134 Util.log("Updating the VDI record")
3135 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3136 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3137 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3139 if child.hidden:
3140 child._setHidden(False)
3141 if not parent.hidden:
3142 parent._setHidden(True)
3143 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3144 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3145 Util.log("*** leaf-coalesce undo successful")
3146 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3147 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3149 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3150 Util.log("*** FINISH LEAF-COALESCE")
3151 vdi = self.getVDI(childUuid)
3152 if not vdi:
3153 raise util.SMException("VDI %s not found" % childUuid)
3154 try:
3155 self.forgetVDI(parentUuid)
3156 except XenAPI.Failure:
3157 pass
3158 self._updateSlavesOnResize(vdi)
3159 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3160 Util.log("*** finished leaf-coalesce successfully")
3163class LVMSR(SR):
3164 TYPE = SR.TYPE_LVHD
3165 SUBTYPES = ["lvhdoiscsi", "lvhdohba"]
3167 def __init__(self, uuid, xapi, createLock, force):
3168 SR.__init__(self, uuid, xapi, createLock, force)
3169 self.vgName = "%s%s" % (VG_PREFIX, self.uuid)
3170 self.path = os.path.join(VG_LOCATION, self.vgName)
3172 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid)
3173 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref)
3174 lvm_conf = other_conf.get('lvm-conf') if other_conf else None
3175 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf)
3177 self.lvActivator = LVActivator(self.uuid, self.lvmCache)
3178 self.journaler = journaler.Journaler(self.lvmCache)
3180 @override
3181 def deleteVDI(self, vdi) -> None:
3182 if self.lvActivator.get(vdi.uuid, False):
3183 self.lvActivator.deactivate(vdi.uuid, False)
3184 self._checkSlaves(vdi)
3185 SR.deleteVDI(self, vdi)
3187 @override
3188 def forgetVDI(self, vdiUuid) -> None:
3189 SR.forgetVDI(self, vdiUuid)
3190 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME)
3191 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
3193 @override
3194 def getFreeSpace(self) -> int:
3195 stats = lvutil._getVGstats(self.vgName)
3196 return stats['physical_size'] - stats['physical_utilisation']
3198 @override
3199 def cleanup(self):
3200 if not self.lvActivator.deactivateAll():
3201 Util.log("ERROR deactivating LVs while cleaning up")
3203 @override
3204 def needUpdateBlockInfo(self) -> bool:
3205 for vdi in self.vdis.values():
3206 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3207 continue
3208 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3209 return True
3210 return False
3212 @override
3213 def updateBlockInfo(self) -> None:
3214 numUpdated = 0
3215 for vdi in self.vdis.values():
3216 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3217 continue
3218 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3219 vdi.updateBlockInfo()
3220 numUpdated += 1
3221 if numUpdated:
3222 # deactivate the LVs back sooner rather than later. If we don't
3223 # now, by the time this thread gets to deactivations, another one
3224 # might have leaf-coalesced a node and deleted it, making the child
3225 # inherit the refcount value and preventing the correct decrement
3226 self.cleanup()
3228 @override
3229 def scan(self, force=False) -> None:
3230 vdis = self._scan(force)
3231 for uuid, vdiInfo in vdis.items():
3232 vdi = self.getVDI(uuid)
3233 if not vdi:
3234 self.logFilter.logNewVDI(uuid)
3235 vdi = LVMVDI(self, uuid, vdiInfo.vdiType)
3236 self.vdis[uuid] = vdi
3237 vdi.load(vdiInfo)
3238 self._removeStaleVDIs(vdis.keys())
3239 self._buildTree(force)
3240 self.logFilter.logState()
3241 self._handleInterruptedCoalesceLeaf()
3243 def _scan(self, force):
3244 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3245 error = False
3246 self.lvmCache.refresh()
3247 vdis = LvmCowUtil.getVDIInfo(self.lvmCache)
3248 for uuid, vdiInfo in vdis.items():
3249 if vdiInfo.scanError:
3250 error = True
3251 break
3252 if not error:
3253 return vdis
3254 Util.log("Scan error, retrying (%d)" % i)
3255 if force:
3256 return vdis
3257 raise util.SMException("Scan error")
3259 @override
3260 def _removeStaleVDIs(self, uuidsPresent) -> None:
3261 for uuid in list(self.vdis.keys()):
3262 if not uuid in uuidsPresent:
3263 Util.log("VDI %s disappeared since last scan" % \
3264 self.vdis[uuid])
3265 del self.vdis[uuid]
3266 if self.lvActivator.get(uuid, False):
3267 self.lvActivator.remove(uuid, False)
3269 @override
3270 def _liveLeafCoalesce(self, vdi) -> bool:
3271 """If the parent is raw and the child was resized (virt. size), then
3272 we'll need to resize the parent, which can take a while due to zeroing
3273 out of the extended portion of the LV. Do it before pausing the child
3274 to avoid a protracted downtime"""
3275 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt:
3276 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3277 vdi.parent._increaseSizeVirt(vdi.sizeVirt)
3279 return SR._liveLeafCoalesce(self, vdi)
3281 @override
3282 def _prepareCoalesceLeaf(self, vdi) -> None:
3283 vdi._activateChain()
3284 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3285 vdi.deflate()
3286 vdi.inflateParentForCoalesce()
3288 @override
3289 def _updateNode(self, vdi) -> None:
3290 # fix the refcounts: the remaining node should inherit the binary
3291 # refcount from the leaf (because if it was online, it should remain
3292 # refcounted as such), but the normal refcount from the parent (because
3293 # this node is really the parent node) - minus 1 if it is online (since
3294 # non-leaf nodes increment their normal counts when they are online and
3295 # we are now a leaf, storing that 1 in the binary refcount).
3296 ns = NS_PREFIX_LVM + self.uuid
3297 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns)
3298 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns)
3299 pCnt = pCnt - cBcnt
3300 assert(pCnt >= 0)
3301 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
3303 @override
3304 def _finishCoalesceLeaf(self, parent) -> None:
3305 if not parent.isSnapshot() or parent.isAttachedRW():
3306 parent.inflateFully()
3307 else:
3308 parent.deflate()
3310 @override
3311 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3312 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV
3314 @override
3315 def _handleInterruptedCoalesceLeaf(self) -> None:
3316 entries = self.journaler.getAll(VDI.JRN_LEAF)
3317 for uuid, parentUuid in entries.items():
3318 undo = False
3319 for prefix in LV_PREFIX.values():
3320 parentLV = prefix + parentUuid
3321 undo = self.lvmCache.checkLV(parentLV)
3322 if undo:
3323 break
3325 if not undo:
3326 for prefix in LV_PREFIX.values():
3327 tmpChildLV = prefix + uuid
3328 undo = self.lvmCache.checkLV(tmpChildLV)
3329 if undo:
3330 break
3332 if undo:
3333 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3334 else:
3335 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3336 self.journaler.remove(VDI.JRN_LEAF, uuid)
3337 vdi = self.getVDI(uuid)
3338 if vdi:
3339 vdi.ensureUnpaused()
3341 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3342 Util.log("*** UNDO LEAF-COALESCE")
3343 parent = self.getVDI(parentUuid)
3344 if not parent:
3345 parent = self.getVDI(childUuid)
3346 if not parent:
3347 raise util.SMException("Neither %s nor %s found" % \
3348 (parentUuid, childUuid))
3349 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3350 parent.rename(parentUuid)
3351 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3353 child = self.getVDI(childUuid)
3354 if not child:
3355 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3356 if not child:
3357 raise util.SMException("Neither %s nor %s found" % \
3358 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3359 Util.log("Renaming child back to %s" % childUuid)
3360 child.rename(childUuid)
3361 Util.log("Updating the VDI record")
3362 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3363 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3364 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3366 # refcount (best effort - assume that it had succeeded if the
3367 # second rename succeeded; if not, this adjustment will be wrong,
3368 # leading to a non-deactivation of the LV)
3369 ns = NS_PREFIX_LVM + self.uuid
3370 cCnt, cBcnt = RefCounter.check(child.uuid, ns)
3371 pCnt, pBcnt = RefCounter.check(parent.uuid, ns)
3372 pCnt = pCnt + cBcnt
3373 RefCounter.set(parent.uuid, pCnt, 0, ns)
3374 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
3376 parent.deflate()
3377 child.inflateFully()
3378 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid)
3379 if child.hidden:
3380 child._setHidden(False)
3381 if not parent.hidden:
3382 parent._setHidden(True)
3383 if not parent.lvReadonly:
3384 self.lvmCache.setReadonly(parent.fileName, True)
3385 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3386 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3387 Util.log("*** leaf-coalesce undo successful")
3388 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3389 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3391 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3392 Util.log("*** FINISH LEAF-COALESCE")
3393 vdi = self.getVDI(childUuid)
3394 if not vdi:
3395 raise util.SMException("VDI %s not found" % childUuid)
3396 vdi.inflateFully()
3397 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid)
3398 try:
3399 self.forgetVDI(parentUuid)
3400 except XenAPI.Failure:
3401 pass
3402 self._updateSlavesOnResize(vdi)
3403 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3404 Util.log("*** finished leaf-coalesce successfully")
3406 def _checkSlaves(self, vdi):
3407 """Confirm with all slaves in the pool that 'vdi' is not in use. We
3408 try to check all slaves, including those that the Agent believes are
3409 offline, but ignore failures for offline hosts. This is to avoid cases
3410 where the Agent thinks a host is offline but the host is up."""
3411 args = {"vgName": self.vgName,
3412 "action1": "deactivateNoRefcount",
3413 "lvName1": vdi.fileName,
3414 "action2": "cleanupLockAndRefcount",
3415 "uuid2": vdi.uuid,
3416 "ns2": NS_PREFIX_LVM + self.uuid}
3417 onlineHosts = self.xapi.getOnlineHosts()
3418 abortFlag = IPCFlag(self.uuid)
3419 for pbdRecord in self.xapi.getAttachedPBDs():
3420 hostRef = pbdRecord["host"]
3421 if hostRef == self.xapi._hostRef:
3422 continue
3423 if abortFlag.test(FLAG_TYPE_ABORT):
3424 raise AbortException("Aborting due to signal")
3425 Util.log("Checking with slave %s (path %s)" % (
3426 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path))
3427 try:
3428 self.xapi.ensureInactive(hostRef, args)
3429 except XenAPI.Failure:
3430 if hostRef in onlineHosts:
3431 raise
3433 @override
3434 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
3435 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid])
3436 if not slaves:
3437 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \
3438 child)
3439 return
3441 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid
3442 args = {"vgName": self.vgName,
3443 "action1": "deactivateNoRefcount",
3444 "lvName1": tmpName,
3445 "action2": "deactivateNoRefcount",
3446 "lvName2": child.fileName,
3447 "action3": "refresh",
3448 "lvName3": child.fileName,
3449 "action4": "refresh",
3450 "lvName4": parent.fileName}
3451 for slave in slaves:
3452 Util.log("Updating %s, %s, %s on slave %s" % \
3453 (tmpName, child.fileName, parent.fileName,
3454 self.xapi.getRecordHost(slave)['hostname']))
3455 text = self.xapi.session.xenapi.host.call_plugin( \
3456 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3457 Util.log("call-plugin returned: '%s'" % text)
3459 @override
3460 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None:
3461 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid])
3462 if not slaves:
3463 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi)
3464 return
3466 args = {"vgName": self.vgName,
3467 "action1": "deactivateNoRefcount",
3468 "lvName1": oldNameLV,
3469 "action2": "refresh",
3470 "lvName2": vdi.fileName,
3471 "action3": "cleanupLockAndRefcount",
3472 "uuid3": origParentUuid,
3473 "ns3": NS_PREFIX_LVM + self.uuid}
3474 for slave in slaves:
3475 Util.log("Updating %s to %s on slave %s" % \
3476 (oldNameLV, vdi.fileName,
3477 self.xapi.getRecordHost(slave)['hostname']))
3478 text = self.xapi.session.xenapi.host.call_plugin( \
3479 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3480 Util.log("call-plugin returned: '%s'" % text)
3482 @override
3483 def _updateSlavesOnResize(self, vdi) -> None:
3484 uuids = [x.uuid for x in vdi.getAllLeaves()]
3485 slaves = util.get_slaves_attached_on(self.xapi.session, uuids)
3486 if not slaves:
3487 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi)
3488 return
3489 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName,
3490 vdi.fileName, vdi.uuid, slaves)
3493class LinstorSR(SR):
3494 TYPE = SR.TYPE_LINSTOR
3496 def __init__(self, uuid, xapi, createLock, force):
3497 if not LINSTOR_AVAILABLE:
3498 raise util.SMException(
3499 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing'
3500 )
3502 SR.__init__(self, uuid, xapi, createLock, force)
3503 self.path = LinstorVolumeManager.DEV_ROOT_PATH
3505 class LinstorProxy:
3506 def __init__(self, sr: LinstorSR) -> None:
3507 self.sr = sr
3509 def __getattr__(self, attr: str) -> Any:
3510 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance"
3511 return getattr(self.sr._linstor, attr)
3513 self._linstor_proxy = LinstorProxy(self)
3514 self._reloadLinstor(journaler_only=True)
3516 @override
3517 def deleteVDI(self, vdi) -> None:
3518 self._checkSlaves(vdi)
3519 SR.deleteVDI(self, vdi)
3521 @override
3522 def getFreeSpace(self) -> int:
3523 return self._linstor.max_volume_size_allowed
3525 @override
3526 def scan(self, force=False) -> None:
3527 all_vdi_info = self._scan(force)
3528 for uuid, vdiInfo in all_vdi_info.items():
3529 # When vdiInfo is None, the VDI is RAW.
3530 vdi = self.getVDI(uuid)
3531 if not vdi:
3532 self.logFilter.logNewVDI(uuid)
3533 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType)
3534 self.vdis[uuid] = vdi
3535 if vdiInfo:
3536 vdi.load(vdiInfo)
3537 self._removeStaleVDIs(all_vdi_info.keys())
3538 self._buildTree(force)
3539 self.logFilter.logState()
3540 self._handleInterruptedCoalesceLeaf()
3542 @override
3543 def pauseVDIs(self, vdiList) -> None:
3544 self._linstor.ensure_volume_list_is_not_locked(
3545 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3546 )
3547 return super(LinstorSR, self).pauseVDIs(vdiList)
3549 def _reloadLinstor(self, journaler_only=False):
3550 session = self.xapi.session
3551 host_ref = util.get_this_host_ref(session)
3552 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
3554 pbd = util.find_my_pbd(session, host_ref, sr_ref)
3555 if pbd is None:
3556 raise util.SMException('Failed to find PBD')
3558 dconf = session.xenapi.PBD.get_device_config(pbd)
3559 group_name = dconf['group-name']
3561 controller_uri = get_controller_uri()
3562 self.journaler = LinstorJournaler(
3563 controller_uri, group_name, logger=util.SMlog
3564 )
3566 if journaler_only:
3567 return
3569 self._linstor = LinstorVolumeManager(
3570 controller_uri,
3571 group_name,
3572 repair=True,
3573 logger=util.SMlog
3574 )
3576 def _scan(self, force):
3577 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3578 self._reloadLinstor()
3579 error = False
3580 try:
3581 all_vdi_info = self._load_vdi_info()
3582 for uuid, vdiInfo in all_vdi_info.items():
3583 if vdiInfo and vdiInfo.error:
3584 error = True
3585 break
3586 if not error:
3587 return all_vdi_info
3588 Util.log('Scan error, retrying ({})'.format(i))
3589 except Exception as e:
3590 Util.log('Scan exception, retrying ({}): {}'.format(i, e))
3591 Util.log(traceback.format_exc())
3593 if force:
3594 return all_vdi_info
3595 raise util.SMException('Scan error')
3597 def _load_vdi_info(self):
3598 all_vdi_info = {}
3600 # TODO: Ensure metadata contains the right info.
3602 all_volume_info = self._linstor.get_volumes_with_info()
3603 volumes_metadata = self._linstor.get_volumes_with_metadata()
3604 for vdi_uuid, volume_info in all_volume_info.items():
3605 try:
3606 volume_metadata = volumes_metadata[vdi_uuid]
3607 if not volume_info.name and not list(volume_metadata.items()):
3608 continue # Ignore it, probably deleted.
3610 if vdi_uuid.startswith('DELETED_'):
3611 # Assume it's really a RAW volume of a failed snap without COW header/footer.
3612 # We must remove this VDI now without adding it in the VDI list.
3613 # Otherwise `Relinking` calls and other actions can be launched on it.
3614 # We don't want that...
3615 Util.log('Deleting bad VDI {}'.format(vdi_uuid))
3617 self.lock()
3618 try:
3619 self._linstor.destroy_volume(vdi_uuid)
3620 try:
3621 self.forgetVDI(vdi_uuid)
3622 except:
3623 pass
3624 except Exception as e:
3625 Util.log('Cannot delete bad VDI: {}'.format(e))
3626 finally:
3627 self.unlock()
3628 continue
3630 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
3631 volume_name = self._linstor.get_volume_name(vdi_uuid)
3632 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX):
3633 # Always RAW!
3634 info = None
3635 elif VdiType.isCowImage(vdi_type):
3636 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid)
3637 else:
3638 # Ensure it's not a COW image...
3639 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type)
3640 try:
3641 info = linstorcowutil.get_info(vdi_uuid)
3642 except:
3643 try:
3644 linstorcowutil.force_repair(
3645 self._linstor.get_device_path(vdi_uuid)
3646 )
3647 info = linstorcowutil.get_info(vdi_uuid)
3648 except:
3649 info = None
3651 except Exception as e:
3652 Util.log(
3653 ' [VDI {}: failed to load VDI info]: {}'
3654 .format(vdi_uuid, e)
3655 )
3656 info = CowImageInfo(vdi_uuid)
3657 info.error = 1
3659 all_vdi_info[vdi_uuid] = info
3661 return all_vdi_info
3663 @override
3664 def _prepareCoalesceLeaf(self, vdi) -> None:
3665 vdi._activateChain()
3666 vdi.deflate()
3667 vdi._inflateParentForCoalesce()
3669 @override
3670 def _finishCoalesceLeaf(self, parent) -> None:
3671 if not parent.isSnapshot() or parent.isAttachedRW():
3672 parent.inflateFully()
3673 else:
3674 parent.deflate()
3676 @override
3677 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3678 return LinstorCowUtil(
3679 self.xapi.session, self._linstor, parent.vdi_type
3680 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize()
3682 def _hasValidDevicePath(self, uuid):
3683 try:
3684 self._linstor.get_device_path(uuid)
3685 except Exception:
3686 # TODO: Maybe log exception.
3687 return False
3688 return True
3690 @override
3691 def _liveLeafCoalesce(self, vdi) -> bool:
3692 self.lock()
3693 try:
3694 self._linstor.ensure_volume_is_not_locked(
3695 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3696 )
3697 return super(LinstorSR, self)._liveLeafCoalesce(vdi)
3698 finally:
3699 self.unlock()
3701 @override
3702 def _handleInterruptedCoalesceLeaf(self) -> None:
3703 entries = self.journaler.get_all(VDI.JRN_LEAF)
3704 for uuid, parentUuid in entries.items():
3705 if self._hasValidDevicePath(parentUuid) or \
3706 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid):
3707 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3708 else:
3709 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3710 self.journaler.remove(VDI.JRN_LEAF, uuid)
3711 vdi = self.getVDI(uuid)
3712 if vdi:
3713 vdi.ensureUnpaused()
3715 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3716 Util.log('*** UNDO LEAF-COALESCE')
3717 parent = self.getVDI(parentUuid)
3718 if not parent:
3719 parent = self.getVDI(childUuid)
3720 if not parent:
3721 raise util.SMException(
3722 'Neither {} nor {} found'.format(parentUuid, childUuid)
3723 )
3724 Util.log(
3725 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
3726 )
3727 parent.rename(parentUuid)
3729 child = self.getVDI(childUuid)
3730 if not child:
3731 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3732 if not child:
3733 raise util.SMException(
3734 'Neither {} nor {} found'.format(
3735 childUuid, self.TMP_RENAME_PREFIX + childUuid
3736 )
3737 )
3738 Util.log('Renaming child back to {}'.format(childUuid))
3739 child.rename(childUuid)
3740 Util.log('Updating the VDI record')
3741 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3742 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3744 # TODO: Maybe deflate here.
3746 if child.hidden:
3747 child._setHidden(False)
3748 if not parent.hidden:
3749 parent._setHidden(True)
3750 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3751 Util.log('*** leaf-coalesce undo successful')
3753 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3754 Util.log('*** FINISH LEAF-COALESCE')
3755 vdi = self.getVDI(childUuid)
3756 if not vdi:
3757 raise util.SMException('VDI {} not found'.format(childUuid))
3758 # TODO: Maybe inflate.
3759 try:
3760 self.forgetVDI(parentUuid)
3761 except XenAPI.Failure:
3762 pass
3763 self._updateSlavesOnResize(vdi)
3764 Util.log('*** finished leaf-coalesce successfully')
3766 def _checkSlaves(self, vdi):
3767 try:
3768 all_openers = self._linstor.get_volume_openers(vdi.uuid)
3769 for openers in all_openers.values():
3770 for opener in openers.values():
3771 if opener['process-name'] != 'tapdisk':
3772 raise util.SMException(
3773 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3774 )
3775 except LinstorVolumeManagerError as e:
3776 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
3777 raise
3780################################################################################
3781#
3782# Helpers
3783#
3784def daemonize():
3785 pid = os.fork()
3786 if pid:
3787 os.waitpid(pid, 0)
3788 Util.log("New PID [%d]" % pid)
3789 return False
3790 os.chdir("/")
3791 os.setsid()
3792 pid = os.fork()
3793 if pid:
3794 Util.log("Will finish as PID [%d]" % pid)
3795 os._exit(0)
3796 for fd in [0, 1, 2]:
3797 try:
3798 os.close(fd)
3799 except OSError:
3800 pass
3801 # we need to fill those special fd numbers or pread won't work
3802 sys.stdin = open("/dev/null", 'r')
3803 sys.stderr = open("/dev/null", 'w')
3804 sys.stdout = open("/dev/null", 'w')
3805 # As we're a new process we need to clear the lock objects
3806 lock.Lock.clearAll()
3807 return True
3810def normalizeType(type):
3811 if type in LVMSR.SUBTYPES:
3812 type = SR.TYPE_LVHD
3813 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]:
3814 # temporary while LVHD is symlinked as LVM
3815 type = SR.TYPE_LVHD
3816 if type in [
3817 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs",
3818 "moosefs", "xfs", "zfs", "largeblock"
3819 ]:
3820 type = SR.TYPE_FILE
3821 if type in ["linstor"]:
3822 type = SR.TYPE_LINSTOR
3823 if type not in SR.TYPES:
3824 raise util.SMException("Unsupported SR type: %s" % type)
3825 return type
3827GCPAUSE_DEFAULT_SLEEP = 5 * 60
3830def _gc_init_file(sr_uuid):
3831 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init')
3834def _create_init_file(sr_uuid):
3835 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid)))
3836 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f:
3837 f.write('1')
3840def _gcLoopPause(sr, dryRun=False, immediate=False):
3841 if immediate:
3842 return
3844 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist
3845 # point will just return. Otherwise, fall back on an abortable sleep.
3847 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT):
3849 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3849 ↛ exitline 3849 didn't jump to the function exit
3850 lambda *args: None)
3851 elif os.path.exists(_gc_init_file(sr.uuid)):
3852 def abortTest():
3853 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
3855 # If time.sleep hangs we are in deep trouble, however for
3856 # completeness we set the timeout of the abort thread to
3857 # 110% of GCPAUSE_DEFAULT_SLEEP.
3858 Util.log("GC active, about to go quiet")
3859 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3859 ↛ exitline 3859 didn't run the lambda on line 3859
3860 None, sr.uuid, abortTest, VDI.POLL_INTERVAL,
3861 GCPAUSE_DEFAULT_SLEEP * 1.1)
3862 Util.log("GC active, quiet period ended")
3865def _gcLoop(sr, dryRun=False, immediate=False):
3866 if not lockGCActive.acquireNoblock(): 3866 ↛ 3867line 3866 didn't jump to line 3867, because the condition on line 3866 was never true
3867 Util.log("Another GC instance already active, exiting")
3868 return
3870 # Check we're still attached after acquiring locks
3871 if not sr.xapi.isPluggedHere():
3872 Util.log("SR no longer attached, exiting")
3873 return
3875 # Clean up Intellicache files
3876 sr.cleanupCache()
3878 # Track how many we do
3879 coalesced = 0
3880 task_status = "success"
3881 try:
3882 # Check if any work needs to be done
3883 if not sr.xapi.isPluggedHere(): 3883 ↛ 3884line 3883 didn't jump to line 3884, because the condition on line 3883 was never true
3884 Util.log("SR no longer attached, exiting")
3885 return
3886 sr.scanLocked()
3887 if not sr.hasWork():
3888 Util.log("No work, exiting")
3889 return
3890 sr.xapi.create_task(
3891 "Garbage Collection",
3892 "Garbage collection for SR %s" % sr.uuid)
3893 _gcLoopPause(sr, dryRun, immediate=immediate)
3894 while True:
3895 if SIGTERM:
3896 Util.log("Term requested")
3897 return
3899 if not sr.xapi.isPluggedHere(): 3899 ↛ 3900line 3899 didn't jump to line 3900, because the condition on line 3899 was never true
3900 Util.log("SR no longer attached, exiting")
3901 break
3902 sr.scanLocked()
3903 if not sr.hasWork():
3904 Util.log("No work, exiting")
3905 break
3907 if not lockGCRunning.acquireNoblock(): 3907 ↛ 3908line 3907 didn't jump to line 3908, because the condition on line 3907 was never true
3908 Util.log("Unable to acquire GC running lock.")
3909 return
3910 try:
3911 if not sr.gcEnabled(): 3911 ↛ 3912line 3911 didn't jump to line 3912, because the condition on line 3911 was never true
3912 break
3914 sr.xapi.update_task_progress("done", coalesced)
3916 sr.cleanupCoalesceJournals()
3917 # Create the init file here in case startup is waiting on it
3918 _create_init_file(sr.uuid)
3919 sr.scanLocked()
3920 sr.updateBlockInfo()
3922 howmany = len(sr.findGarbage())
3923 if howmany > 0:
3924 Util.log("Found %d orphaned vdis" % howmany)
3925 sr.lock()
3926 try:
3927 sr.garbageCollect(dryRun)
3928 finally:
3929 sr.unlock()
3930 sr.xapi.srUpdate()
3932 candidate = sr.findCoalesceable()
3933 if candidate:
3934 util.fistpoint.activate(
3935 "LVHDRT_finding_a_suitable_pair", sr.uuid)
3936 sr.coalesce(candidate, dryRun)
3937 sr.xapi.srUpdate()
3938 coalesced += 1
3939 continue
3941 candidate = sr.findLeafCoalesceable()
3942 if candidate: 3942 ↛ 3949line 3942 didn't jump to line 3949, because the condition on line 3942 was never false
3943 sr.coalesceLeaf(candidate, dryRun)
3944 sr.xapi.srUpdate()
3945 coalesced += 1
3946 continue
3948 finally:
3949 lockGCRunning.release() 3949 ↛ 3954line 3949 didn't jump to line 3954, because the break on line 3912 wasn't executed
3950 except:
3951 task_status = "failure"
3952 raise
3953 finally:
3954 sr.xapi.set_task_status(task_status)
3955 Util.log("GC process exiting, no work left")
3956 _create_init_file(sr.uuid)
3957 lockGCActive.release()
3960def _xapi_enabled(session, hostref):
3961 host = session.xenapi.host.get_record(hostref)
3962 return host['enabled']
3965def _ensure_xapi_initialised(session):
3966 """
3967 Don't want to start GC until Xapi is fully initialised
3968 """
3969 local_session = None
3970 if session is None:
3971 local_session = util.get_localAPI_session()
3972 session = local_session
3974 try:
3975 hostref = session.xenapi.host.get_by_uuid(util.get_this_host())
3976 while not _xapi_enabled(session, hostref):
3977 util.SMlog("Xapi not ready, GC waiting")
3978 time.sleep(15)
3979 finally:
3980 if local_session is not None:
3981 local_session.xenapi.session.logout()
3983def _gc(session, srUuid, dryRun=False, immediate=False):
3984 init(srUuid)
3985 _ensure_xapi_initialised(session)
3986 sr = SR.getInstance(srUuid, session)
3987 if not sr.gcEnabled(False): 3987 ↛ 3988line 3987 didn't jump to line 3988, because the condition on line 3987 was never true
3988 return
3990 try:
3991 _gcLoop(sr, dryRun, immediate=immediate)
3992 finally:
3993 sr.check_no_space_candidates()
3994 sr.cleanup()
3995 sr.logFilter.logState()
3996 del sr.xapi
3999def _abort(srUuid, soft=False):
4000 """Aborts an GC/coalesce.
4002 srUuid: the UUID of the SR whose GC/coalesce must be aborted
4003 soft: If set to True and there is a pending abort signal, the function
4004 doesn't do anything. If set to False, a new abort signal is issued.
4006 returns: If soft is set to False, we return True holding lockGCActive. If
4007 soft is set to False and an abort signal is pending, we return False
4008 without holding lockGCActive. An exception is raised in case of error."""
4009 Util.log("=== SR %s: abort ===" % (srUuid))
4010 init(srUuid)
4011 if not lockGCActive.acquireNoblock():
4012 gotLock = False
4013 Util.log("Aborting currently-running instance (SR %s)" % srUuid)
4014 abortFlag = IPCFlag(srUuid)
4015 if not abortFlag.set(FLAG_TYPE_ABORT, soft):
4016 return False
4017 for i in range(SR.LOCK_RETRY_ATTEMPTS):
4018 gotLock = lockGCActive.acquireNoblock()
4019 if gotLock:
4020 break
4021 time.sleep(SR.LOCK_RETRY_INTERVAL)
4022 abortFlag.clear(FLAG_TYPE_ABORT)
4023 if not gotLock:
4024 raise util.CommandException(code=errno.ETIMEDOUT,
4025 reason="SR %s: error aborting existing process" % srUuid)
4026 return True
4029def init(srUuid):
4030 global lockGCRunning
4031 if not lockGCRunning: 4031 ↛ 4032line 4031 didn't jump to line 4032, because the condition on line 4031 was never true
4032 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
4033 global lockGCActive
4034 if not lockGCActive: 4034 ↛ 4035line 4034 didn't jump to line 4035, because the condition on line 4034 was never true
4035 lockGCActive = LockActive(srUuid)
4038class LockActive:
4039 """
4040 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired
4041 if another process holds the SR lock.
4042 """
4043 def __init__(self, srUuid):
4044 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
4045 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid)
4047 def acquireNoblock(self):
4048 self._srLock.acquire()
4050 try:
4051 return self._lock.acquireNoblock()
4052 finally:
4053 self._srLock.release()
4055 def release(self):
4056 self._lock.release()
4059def usage():
4060 output = """Garbage collect and/or coalesce COW images in a COW-based SR
4062Parameters:
4063 -u --uuid UUID SR UUID
4064 and one of:
4065 -g --gc garbage collect, coalesce, and repeat while there is work
4066 -G --gc_force garbage collect once, aborting any current operations
4067 -c --cache-clean <max_age> clean up IntelliCache cache files older than
4068 max_age hours
4069 -a --abort abort any currently running operation (GC or coalesce)
4070 -q --query query the current state (GC'ing, coalescing or not running)
4071 -x --disable disable GC/coalesce (will be in effect until you exit)
4072 -t --debug see Debug below
4074Options:
4075 -b --background run in background (return immediately) (valid for -g only)
4076 -f --force continue in the presence of COW images with errors (when doing
4077 GC, this might cause removal of any such images) (only valid
4078 for -G) (DANGEROUS)
4080Debug:
4081 The --debug parameter enables manipulation of LVHD VDIs for debugging
4082 purposes. ** NEVER USE IT ON A LIVE VM **
4083 The following parameters are required:
4084 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate",
4085 "deflate".
4086 -v --vdi_uuid VDI UUID
4087 """
4088 #-d --dry-run don't actually perform any SR-modifying operations
4089 print(output)
4090 Util.log("(Invalid usage)")
4091 sys.exit(1)
4094##############################################################################
4095#
4096# API
4097#
4098def abort(srUuid, soft=False):
4099 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair.
4100 """
4101 if _abort(srUuid, soft):
4102 Util.log("abort: releasing the process lock")
4103 lockGCActive.release()
4104 return True
4105 else:
4106 return False
4109def gc(session, srUuid, inBackground, dryRun=False):
4110 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return
4111 immediately if inBackground=True.
4113 The following algorithm is used:
4114 1. If we are already GC'ing in this SR, return
4115 2. If we are already coalescing a VDI pair:
4116 a. Scan the SR and determine if the VDI pair is GC'able
4117 b. If the pair is not GC'able, return
4118 c. If the pair is GC'able, abort coalesce
4119 3. Scan the SR
4120 4. If there is nothing to collect, nor to coalesce, return
4121 5. If there is something to collect, GC all, then goto 3
4122 6. If there is something to coalesce, coalesce one pair, then goto 3
4123 """
4124 Util.log("=== SR %s: gc ===" % srUuid)
4126 signal.signal(signal.SIGTERM, receiveSignal)
4128 if inBackground:
4129 if daemonize(): 4129 ↛ exitline 4129 didn't return from function 'gc', because the condition on line 4129 was never false
4130 # we are now running in the background. Catch & log any errors
4131 # because there is no other way to propagate them back at this
4132 # point
4134 try:
4135 _gc(None, srUuid, dryRun)
4136 except AbortException:
4137 Util.log("Aborted")
4138 except Exception:
4139 Util.logException("gc")
4140 Util.log("* * * * * SR %s: ERROR\n" % srUuid)
4141 os._exit(0)
4142 else:
4143 _gc(session, srUuid, dryRun, immediate=True)
4146def start_gc(session, sr_uuid):
4147 """
4148 This function is used to try to start a backgrounded GC session by forking
4149 the current process. If using the systemd version, call start_gc_service() instead.
4150 """
4151 # don't bother if an instance already running (this is just an
4152 # optimization to reduce the overhead of forking a new process if we
4153 # don't have to, but the process will check the lock anyways)
4154 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
4155 if not lockRunning.acquireNoblock():
4156 if should_preempt(session, sr_uuid):
4157 util.SMlog("Aborting currently-running coalesce of garbage VDI")
4158 try:
4159 if not abort(sr_uuid, soft=True):
4160 util.SMlog("The GC has already been scheduled to re-start")
4161 except util.CommandException as e:
4162 if e.code != errno.ETIMEDOUT:
4163 raise
4164 util.SMlog('failed to abort the GC')
4165 else:
4166 util.SMlog("A GC instance already running, not kicking")
4167 return
4168 else:
4169 lockRunning.release()
4171 util.SMlog(f"Starting GC file is {__file__}")
4172 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
4173 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4175def start_gc_service(sr_uuid, wait=False):
4176 """
4177 This starts the templated systemd service which runs GC on the given SR UUID.
4178 If the service was already started, this is a no-op.
4180 Because the service is a one-shot with RemainAfterExit=no, when called with
4181 wait=True this will run the service synchronously and will not return until the
4182 run has finished. This is used to force a run of the GC instead of just kicking it
4183 in the background.
4184 """
4185 sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
4186 util.SMlog(f"Kicking SMGC@{sr_uuid}...")
4187 cmd=[ "/usr/bin/systemctl", "--quiet" ]
4188 if not wait: 4188 ↛ 4190line 4188 didn't jump to line 4190, because the condition on line 4188 was never false
4189 cmd.append("--no-block")
4190 cmd += ["start", f"SMGC@{sr_uuid_esc}"]
4191 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4194def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
4195 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
4196 the SR lock is held.
4197 The following algorithm is used:
4198 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce
4199 2. Scan the SR
4200 3. GC
4201 4. return
4202 """
4203 Util.log("=== SR %s: gc_force ===" % srUuid)
4204 init(srUuid)
4205 sr = SR.getInstance(srUuid, session, lockSR, True)
4206 if not lockGCActive.acquireNoblock():
4207 abort(srUuid)
4208 else:
4209 Util.log("Nothing was running, clear to proceed")
4211 if force:
4212 Util.log("FORCED: will continue even if there are COW image errors")
4213 sr.scanLocked(force)
4214 sr.cleanupCoalesceJournals()
4216 try:
4217 sr.cleanupCache()
4218 sr.garbageCollect(dryRun)
4219 finally:
4220 sr.cleanup()
4221 sr.logFilter.logState()
4222 lockGCActive.release()
4225def get_state(srUuid):
4226 """Return whether GC/coalesce is currently running or not. This asks systemd for
4227 the state of the templated SMGC service and will return True if it is "activating"
4228 or "running" (for completeness, as in practice it will never achieve the latter state)
4229 """
4230 sr_uuid_esc = srUuid.replace("-", "\\x2d")
4231 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
4232 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4233 state = result.stdout.decode('utf-8').rstrip()
4234 if state == "activating" or state == "running":
4235 return True
4236 return False
4239def should_preempt(session, srUuid):
4240 sr = SR.getInstance(srUuid, session)
4241 entries = sr.journaler.getAll(VDI.JRN_COALESCE)
4242 if len(entries) == 0:
4243 return False
4244 elif len(entries) > 1:
4245 raise util.SMException("More than one coalesce entry: " + str(entries))
4246 sr.scanLocked()
4247 coalescedUuid = entries.popitem()[0]
4248 garbage = sr.findGarbage()
4249 for vdi in garbage:
4250 if vdi.uuid == coalescedUuid:
4251 return True
4252 return False
4255def get_coalesceable_leaves(session, srUuid, vdiUuids):
4256 coalesceable = []
4257 sr = SR.getInstance(srUuid, session)
4258 sr.scanLocked()
4259 for uuid in vdiUuids:
4260 vdi = sr.getVDI(uuid)
4261 if not vdi:
4262 raise util.SMException("VDI %s not found" % uuid)
4263 if vdi.isLeafCoalesceable():
4264 coalesceable.append(uuid)
4265 return coalesceable
4268def cache_cleanup(session, srUuid, maxAge):
4269 sr = SR.getInstance(srUuid, session)
4270 return sr.cleanupCache(maxAge)
4273def debug(sr_uuid, cmd, vdi_uuid):
4274 Util.log("Debug command: %s" % cmd)
4275 sr = SR.getInstance(sr_uuid, None)
4276 if not isinstance(sr, LVMSR):
4277 print("Error: not an LVHD SR")
4278 return
4279 sr.scanLocked()
4280 vdi = sr.getVDI(vdi_uuid)
4281 if not vdi:
4282 print("Error: VDI %s not found")
4283 return
4284 print("Running %s on SR %s" % (cmd, sr))
4285 print("VDI before: %s" % vdi)
4286 if cmd == "activate":
4287 vdi._activate()
4288 print("VDI file: %s" % vdi.path)
4289 if cmd == "deactivate":
4290 ns = NS_PREFIX_LVM + sr.uuid
4291 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False)
4292 if cmd == "inflate":
4293 vdi.inflateFully()
4294 sr.cleanup()
4295 if cmd == "deflate":
4296 vdi.deflate()
4297 sr.cleanup()
4298 sr.scanLocked()
4299 print("VDI after: %s" % vdi)
4302def abort_optional_reenable(uuid):
4303 print("Disabling GC/coalesce for %s" % uuid)
4304 ret = _abort(uuid)
4305 input("Press enter to re-enable...")
4306 print("GC/coalesce re-enabled")
4307 lockGCRunning.release()
4308 if ret:
4309 lockGCActive.release()
4312##############################################################################
4313#
4314# CLI
4315#
4316def main():
4317 action = ""
4318 maxAge = 0
4319 uuid = ""
4320 background = False
4321 force = False
4322 dryRun = False
4323 debug_cmd = ""
4324 vdi_uuid = ""
4325 shortArgs = "gGc:aqxu:bfdt:v:"
4326 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable",
4327 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
4329 try:
4330 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs)
4331 except getopt.GetoptError:
4332 usage()
4333 for o, a in opts:
4334 if o in ("-g", "--gc"):
4335 action = "gc"
4336 if o in ("-G", "--gc_force"):
4337 action = "gc_force"
4338 if o in ("-c", "--clean_cache"):
4339 action = "clean_cache"
4340 maxAge = int(a)
4341 if o in ("-a", "--abort"):
4342 action = "abort"
4343 if o in ("-q", "--query"):
4344 action = "query"
4345 if o in ("-x", "--disable"):
4346 action = "disable"
4347 if o in ("-u", "--uuid"):
4348 uuid = a
4349 if o in ("-b", "--background"):
4350 background = True
4351 if o in ("-f", "--force"):
4352 force = True
4353 if o in ("-d", "--dry-run"):
4354 Util.log("Dry run mode")
4355 dryRun = True
4356 if o in ("-t", "--debug"):
4357 action = "debug"
4358 debug_cmd = a
4359 if o in ("-v", "--vdi_uuid"):
4360 vdi_uuid = a
4362 if not action or not uuid:
4363 usage()
4364 if action == "debug" and not (debug_cmd and vdi_uuid) or \
4365 action != "debug" and (debug_cmd or vdi_uuid):
4366 usage()
4368 if action != "query" and action != "debug":
4369 print("All output goes to log")
4371 if action == "gc":
4372 gc(None, uuid, background, dryRun)
4373 elif action == "gc_force":
4374 gc_force(None, uuid, force, dryRun, True)
4375 elif action == "clean_cache":
4376 cache_cleanup(None, uuid, maxAge)
4377 elif action == "abort":
4378 abort(uuid)
4379 elif action == "query":
4380 print("Currently running: %s" % get_state(uuid))
4381 elif action == "disable":
4382 abort_optional_reenable(uuid)
4383 elif action == "debug":
4384 debug(uuid, debug_cmd, vdi_uuid)
4387if __name__ == '__main__': 4387 ↛ 4388line 4387 didn't jump to line 4388, because the condition on line 4387 was never true
4388 main()