Coverage for drivers/cleanup.py : 35%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/python3
2#
3# Copyright (C) Citrix Systems Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; version 2.1 only.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Script to coalesce and garbage collect VHD-based SR's in the background
19#
21from sm_typing import Optional, override
23import os
24import os.path
25import sys
26import time
27import signal
28import subprocess
29import getopt
30import datetime
31import traceback
32import base64
33import zlib
34import errno
35import stat
37import XenAPI # pylint: disable=import-error
38import util
39import lvutil
40import vhdutil
41import lvhdutil
42import lvmcache
43import journaler
44import fjournaler
45import lock
46import blktap2
47import xs_errors
48from refcounter import RefCounter
49from ipc import IPCFlag
50from lvmanager import LVActivator
51from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG
52from functools import reduce
53from time import monotonic as _time
55try:
56 from linstorjournaler import LinstorJournaler
57 from linstorvhdutil import LinstorVhdUtil
58 from linstorvolumemanager import get_controller_uri
59 from linstorvolumemanager import LinstorVolumeManager
60 from linstorvolumemanager import LinstorVolumeManagerError
61 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
63 LINSTOR_AVAILABLE = True
64except ImportError:
65 LINSTOR_AVAILABLE = False
67# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not
68# possible due to lvhd_stop_using_() not working correctly. However, we leave
69# this option available through the explicit LEAFCLSC_FORCE flag in the VDI
70# record for use by the offline tool (which makes the operation safe by pausing
71# the VM first)
72AUTO_ONLINE_LEAF_COALESCE_ENABLED = True
74FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce
76# process "lock", used simply as an indicator that a process already exists
77# that is doing GC/coalesce on this SR (such a process holds the lock, and we
78# check for the fact by trying the lock).
79lockGCRunning = None
81# process "lock" to indicate that the GC process has been activated but may not
82# yet be running, stops a second process from being started.
83LOCK_TYPE_GC_ACTIVE = "gc_active"
84lockGCActive = None
86# Default coalesce error rate limit, in messages per minute. A zero value
87# disables throttling, and a negative value disables error reporting.
88DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
90COALESCE_LAST_ERR_TAG = 'last-coalesce-error'
91COALESCE_ERR_RATE_TAG = 'coalesce-error-rate'
92VAR_RUN = "/var/run/"
93SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log"
95N_RUNNING_AVERAGE = 10
97NON_PERSISTENT_DIR = '/run/nonpersistent/sm'
99# Signal Handler
100SIGTERM = False
103class AbortException(util.SMException):
104 pass
107def receiveSignal(signalNumber, frame):
108 global SIGTERM
110 util.SMlog("GC: recieved SIGTERM")
111 SIGTERM = True
112 return
115################################################################################
116#
117# Util
118#
119class Util:
120 RET_RC = 1
121 RET_STDOUT = 2
122 RET_STDERR = 4
124 UUID_LEN = 36
126 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024}
128 @staticmethod
129 def log(text) -> None:
130 util.SMlog(text, ident="SMGC")
132 @staticmethod
133 def logException(tag):
134 info = sys.exc_info()
135 if info[0] == SystemExit: 135 ↛ 137line 135 didn't jump to line 137, because the condition on line 135 was never true
136 # this should not be happening when catching "Exception", but it is
137 sys.exit(0)
138 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2]))
139 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
140 Util.log(" ***********************")
141 Util.log(" * E X C E P T I O N *")
142 Util.log(" ***********************")
143 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1]))
144 Util.log(tb)
145 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
147 @staticmethod
148 def doexec(args, expectedRC, inputtext=None, ret=None, log=True):
149 "Execute a subprocess, then return its return code, stdout, stderr"
150 proc = subprocess.Popen(args,
151 stdin=subprocess.PIPE, \
152 stdout=subprocess.PIPE, \
153 stderr=subprocess.PIPE, \
154 shell=True, \
155 close_fds=True)
156 (stdout, stderr) = proc.communicate(inputtext)
157 stdout = str(stdout)
158 stderr = str(stderr)
159 rc = proc.returncode
160 if log:
161 Util.log("`%s`: %s" % (args, rc))
162 if type(expectedRC) != type([]):
163 expectedRC = [expectedRC]
164 if not rc in expectedRC:
165 reason = stderr.strip()
166 if stdout.strip():
167 reason = "%s (stdout: %s)" % (reason, stdout.strip())
168 Util.log("Failed: %s" % reason)
169 raise util.CommandException(rc, args, reason)
171 if ret == Util.RET_RC:
172 return rc
173 if ret == Util.RET_STDERR:
174 return stderr
175 return stdout
177 @staticmethod
178 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut):
179 """execute func in a separate thread and kill it if abortTest signals
180 so"""
181 abortSignaled = abortTest() # check now before we clear resultFlag
182 resultFlag = IPCFlag(ns)
183 resultFlag.clearAll()
184 pid = os.fork()
185 if pid:
186 startTime = _time()
187 try:
188 while True:
189 if resultFlag.test("success"):
190 Util.log(" Child process completed successfully")
191 resultFlag.clear("success")
192 return
193 if resultFlag.test("failure"):
194 resultFlag.clear("failure")
195 raise util.SMException("Child process exited with error")
196 if abortTest() or abortSignaled or SIGTERM:
197 os.killpg(pid, signal.SIGKILL)
198 raise AbortException("Aborting due to signal")
199 if timeOut and _time() - startTime > timeOut:
200 os.killpg(pid, signal.SIGKILL)
201 resultFlag.clearAll()
202 raise util.SMException("Timed out")
203 time.sleep(pollInterval)
204 finally:
205 wait_pid = 0
206 rc = -1
207 count = 0
208 while wait_pid == 0 and count < 10:
209 wait_pid, rc = os.waitpid(pid, os.WNOHANG)
210 if wait_pid == 0:
211 time.sleep(2)
212 count += 1
214 if wait_pid == 0:
215 Util.log("runAbortable: wait for process completion timed out")
216 else:
217 os.setpgrp()
218 try:
219 if func() == ret:
220 resultFlag.set("success")
221 else:
222 resultFlag.set("failure")
223 except Exception as e:
224 Util.log("Child process failed with : (%s)" % e)
225 resultFlag.set("failure")
226 Util.logException("This exception has occured")
227 os._exit(0)
229 @staticmethod
230 def num2str(number):
231 for prefix in ("G", "M", "K"): 231 ↛ 234line 231 didn't jump to line 234, because the loop on line 231 didn't complete
232 if number >= Util.PREFIX[prefix]:
233 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix)
234 return "%s" % number
236 @staticmethod
237 def numBits(val):
238 count = 0
239 while val:
240 count += val & 1
241 val = val >> 1
242 return count
244 @staticmethod
245 def countBits(bitmap1, bitmap2):
246 """return bit count in the bitmap produced by ORing the two bitmaps"""
247 len1 = len(bitmap1)
248 len2 = len(bitmap2)
249 lenLong = len1
250 lenShort = len2
251 bitmapLong = bitmap1
252 if len2 > len1:
253 lenLong = len2
254 lenShort = len1
255 bitmapLong = bitmap2
257 count = 0
258 for i in range(lenShort):
259 val = bitmap1[i] | bitmap2[i]
260 count += Util.numBits(val)
262 for i in range(i + 1, lenLong):
263 val = bitmapLong[i]
264 count += Util.numBits(val)
265 return count
267 @staticmethod
268 def getThisScript():
269 thisScript = util.get_real_path(__file__)
270 if thisScript.endswith(".pyc"):
271 thisScript = thisScript[:-1]
272 return thisScript
275################################################################################
276#
277# XAPI
278#
279class XAPI:
280 USER = "root"
281 PLUGIN_ON_SLAVE = "on-slave"
283 CONFIG_SM = 0
284 CONFIG_OTHER = 1
285 CONFIG_ON_BOOT = 2
286 CONFIG_ALLOW_CACHING = 3
288 CONFIG_NAME = {
289 CONFIG_SM: "sm-config",
290 CONFIG_OTHER: "other-config",
291 CONFIG_ON_BOOT: "on-boot",
292 CONFIG_ALLOW_CACHING: "allow_caching"
293 }
295 class LookupError(util.SMException):
296 pass
298 @staticmethod
299 def getSession():
300 session = XenAPI.xapi_local()
301 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM')
302 return session
304 def __init__(self, session, srUuid):
305 self.sessionPrivate = False
306 self.session = session
307 if self.session is None:
308 self.session = self.getSession()
309 self.sessionPrivate = True
310 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid)
311 self.srRecord = self.session.xenapi.SR.get_record(self._srRef)
312 self.hostUuid = util.get_this_host()
313 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
314 self.task = None
315 self.task_progress = {"coalescable": 0, "done": 0}
317 def __del__(self):
318 if self.sessionPrivate:
319 self.session.xenapi.session.logout()
321 @property
322 def srRef(self):
323 return self._srRef
325 def isPluggedHere(self):
326 pbds = self.getAttachedPBDs()
327 for pbdRec in pbds:
328 if pbdRec["host"] == self._hostRef:
329 return True
330 return False
332 def poolOK(self):
333 host_recs = self.session.xenapi.host.get_all_records()
334 for host_ref, host_rec in host_recs.items():
335 if not host_rec["enabled"]:
336 Util.log("Host %s not enabled" % host_rec["uuid"])
337 return False
338 return True
340 def isMaster(self):
341 if self.srRecord["shared"]:
342 pool = list(self.session.xenapi.pool.get_all_records().values())[0]
343 return pool["master"] == self._hostRef
344 else:
345 pbds = self.getAttachedPBDs()
346 if len(pbds) < 1:
347 raise util.SMException("Local SR not attached")
348 elif len(pbds) > 1:
349 raise util.SMException("Local SR multiply attached")
350 return pbds[0]["host"] == self._hostRef
352 def getAttachedPBDs(self):
353 """Return PBD records for all PBDs of this SR that are currently
354 attached"""
355 attachedPBDs = []
356 pbds = self.session.xenapi.PBD.get_all_records()
357 for pbdRec in pbds.values():
358 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]:
359 attachedPBDs.append(pbdRec)
360 return attachedPBDs
362 def getOnlineHosts(self):
363 return util.get_online_hosts(self.session)
365 def ensureInactive(self, hostRef, args):
366 text = self.session.xenapi.host.call_plugin( \
367 hostRef, self.PLUGIN_ON_SLAVE, "multi", args)
368 Util.log("call-plugin returned: '%s'" % text)
370 def getRecordHost(self, hostRef):
371 return self.session.xenapi.host.get_record(hostRef)
373 def _getRefVDI(self, uuid):
374 return self.session.xenapi.VDI.get_by_uuid(uuid)
376 def getRefVDI(self, vdi):
377 return self._getRefVDI(vdi.uuid)
379 def getRecordVDI(self, uuid):
380 try:
381 ref = self._getRefVDI(uuid)
382 return self.session.xenapi.VDI.get_record(ref)
383 except XenAPI.Failure:
384 return None
386 def singleSnapshotVDI(self, vdi):
387 return self.session.xenapi.VDI.snapshot(vdi.getRef(),
388 {"type": "internal"})
390 def forgetVDI(self, srUuid, vdiUuid):
391 """Forget the VDI, but handle the case where the VDI has already been
392 forgotten (i.e. ignore errors)"""
393 try:
394 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid)
395 self.session.xenapi.VDI.forget(vdiRef)
396 except XenAPI.Failure:
397 pass
399 def getConfigVDI(self, vdi, key):
400 kind = vdi.CONFIG_TYPE[key]
401 if kind == self.CONFIG_SM:
402 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef())
403 elif kind == self.CONFIG_OTHER:
404 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef())
405 elif kind == self.CONFIG_ON_BOOT:
406 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef())
407 elif kind == self.CONFIG_ALLOW_CACHING:
408 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef())
409 else:
410 assert(False)
411 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg)))
412 return cfg
414 def removeFromConfigVDI(self, vdi, key):
415 kind = vdi.CONFIG_TYPE[key]
416 if kind == self.CONFIG_SM:
417 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key)
418 elif kind == self.CONFIG_OTHER:
419 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key)
420 else:
421 assert(False)
423 def addToConfigVDI(self, vdi, key, val):
424 kind = vdi.CONFIG_TYPE[key]
425 if kind == self.CONFIG_SM:
426 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val)
427 elif kind == self.CONFIG_OTHER:
428 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val)
429 else:
430 assert(False)
432 def isSnapshot(self, vdi):
433 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
435 def markCacheSRsDirty(self):
436 sr_refs = self.session.xenapi.SR.get_all_records_where( \
437 'field "local_cache_enabled" = "true"')
438 for sr_ref in sr_refs:
439 Util.log("Marking SR %s dirty" % sr_ref)
440 util.set_dirty(self.session, sr_ref)
442 def srUpdate(self):
443 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"])
444 abortFlag = IPCFlag(self.srRecord["uuid"])
445 task = self.session.xenapi.Async.SR.update(self._srRef)
446 cancelTask = True
447 try:
448 for i in range(60):
449 status = self.session.xenapi.task.get_status(task)
450 if not status == "pending":
451 Util.log("SR.update_asynch status changed to [%s]" % status)
452 cancelTask = False
453 return
454 if abortFlag.test(FLAG_TYPE_ABORT):
455 Util.log("Abort signalled during srUpdate, cancelling task...")
456 try:
457 self.session.xenapi.task.cancel(task)
458 cancelTask = False
459 Util.log("Task cancelled")
460 except:
461 pass
462 return
463 time.sleep(1)
464 finally:
465 if cancelTask:
466 self.session.xenapi.task.cancel(task)
467 self.session.xenapi.task.destroy(task)
468 Util.log("Asynch srUpdate still running, but timeout exceeded.")
470 def update_task(self):
471 self.session.xenapi.task.set_other_config(
472 self.task,
473 {
474 "applies_to": self._srRef
475 })
476 total = self.task_progress['coalescable'] + self.task_progress['done']
477 if (total > 0):
478 self.session.xenapi.task.set_progress(
479 self.task, float(self.task_progress['done']) / total)
481 def create_task(self, label, description):
482 self.task = self.session.xenapi.task.create(label, description)
483 self.update_task()
485 def update_task_progress(self, key, value):
486 self.task_progress[key] = value
487 if self.task:
488 self.update_task()
490 def set_task_status(self, status):
491 if self.task:
492 self.session.xenapi.task.set_status(self.task, status)
495################################################################################
496#
497# VDI
498#
499class VDI(object):
500 """Object representing a VDI of a VHD-based SR"""
502 POLL_INTERVAL = 1
503 POLL_TIMEOUT = 30
504 DEVICE_MAJOR = 202
505 DRIVER_NAME_VHD = "vhd"
507 # config keys & values
508 DB_VHD_PARENT = "vhd-parent"
509 DB_VDI_TYPE = "vdi_type"
510 DB_VHD_BLOCKS = "vhd-blocks"
511 DB_VDI_PAUSED = "paused"
512 DB_VDI_RELINKING = "relinking"
513 DB_VDI_ACTIVATING = "activating"
514 DB_GC = "gc"
515 DB_COALESCE = "coalesce"
516 DB_LEAFCLSC = "leaf-coalesce" # config key
517 DB_GC_NO_SPACE = "gc_no_space"
518 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce
519 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce
520 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means
521 # no space to snap-coalesce or unable to keep
522 # up with VDI. This is not used by the SM, it
523 # might be used by external components.
524 DB_ONBOOT = "on-boot"
525 ONBOOT_RESET = "reset"
526 DB_ALLOW_CACHING = "allow_caching"
528 CONFIG_TYPE = {
529 DB_VHD_PARENT: XAPI.CONFIG_SM,
530 DB_VDI_TYPE: XAPI.CONFIG_SM,
531 DB_VHD_BLOCKS: XAPI.CONFIG_SM,
532 DB_VDI_PAUSED: XAPI.CONFIG_SM,
533 DB_VDI_RELINKING: XAPI.CONFIG_SM,
534 DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
535 DB_GC: XAPI.CONFIG_OTHER,
536 DB_COALESCE: XAPI.CONFIG_OTHER,
537 DB_LEAFCLSC: XAPI.CONFIG_OTHER,
538 DB_ONBOOT: XAPI.CONFIG_ON_BOOT,
539 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING,
540 DB_GC_NO_SPACE: XAPI.CONFIG_SM
541 }
543 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes
544 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds
545 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating
546 # feasibility of leaf coalesce
548 JRN_RELINK = "relink" # journal entry type for relinking children
549 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced
550 JRN_LEAF = "leaf" # used in coalesce-leaf
552 STR_TREE_INDENT = 4
554 def __init__(self, sr, uuid, raw):
555 self.sr = sr
556 self.scanError = True
557 self.uuid = uuid
558 self.raw = raw
559 self.fileName = ""
560 self.parentUuid = ""
561 self.sizeVirt = -1
562 self._sizeVHD = -1
563 self._sizeAllocated = -1
564 self._hidden = False
565 self.parent = None
566 self.children = []
567 self._vdiRef = None
568 self._clearRef()
570 @staticmethod
571 def extractUuid(path):
572 raise NotImplementedError("Implement in sub class")
574 def load(self, info=None) -> None:
575 """Load VDI info"""
576 pass
578 def getDriverName(self) -> str:
579 return self.DRIVER_NAME_VHD
581 def getRef(self):
582 if self._vdiRef is None:
583 self._vdiRef = self.sr.xapi.getRefVDI(self)
584 return self._vdiRef
586 def getConfig(self, key, default=None):
587 config = self.sr.xapi.getConfigVDI(self, key)
588 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 588 ↛ 589line 588 didn't jump to line 589, because the condition on line 588 was never true
589 val = config
590 else:
591 val = config.get(key)
592 if val:
593 return val
594 return default
596 def setConfig(self, key, val):
597 self.sr.xapi.removeFromConfigVDI(self, key)
598 self.sr.xapi.addToConfigVDI(self, key, val)
599 Util.log("Set %s = %s for %s" % (key, val, self))
601 def delConfig(self, key):
602 self.sr.xapi.removeFromConfigVDI(self, key)
603 Util.log("Removed %s from %s" % (key, self))
605 def ensureUnpaused(self):
606 if self.getConfig(self.DB_VDI_PAUSED) == "true":
607 Util.log("Unpausing VDI %s" % self)
608 self.unpause()
610 def pause(self, failfast=False) -> None:
611 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid,
612 self.uuid, failfast):
613 raise util.SMException("Failed to pause VDI %s" % self)
615 def _report_tapdisk_unpause_error(self):
616 try:
617 xapi = self.sr.xapi.session.xenapi
618 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid)
619 msg_name = "failed to unpause tapdisk"
620 msg_body = "Failed to unpause tapdisk for VDI %s, " \
621 "VMs using this tapdisk have lost access " \
622 "to the corresponding disk(s)" % self.uuid
623 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body)
624 except Exception as e:
625 util.SMlog("failed to generate message: %s" % e)
627 def unpause(self):
628 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid,
629 self.uuid):
630 self._report_tapdisk_unpause_error()
631 raise util.SMException("Failed to unpause VDI %s" % self)
633 def refresh(self, ignoreNonexistent=True):
634 """Pause-unpause in one step"""
635 self.sr.lock()
636 try:
637 try:
638 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 638 ↛ 640line 638 didn't jump to line 640, because the condition on line 638 was never true
639 self.sr.uuid, self.uuid):
640 self._report_tapdisk_unpause_error()
641 raise util.SMException("Failed to refresh %s" % self)
642 except XenAPI.Failure as e:
643 if util.isInvalidVDI(e) and ignoreNonexistent:
644 Util.log("VDI %s not found, ignoring" % self)
645 return
646 raise
647 finally:
648 self.sr.unlock()
650 def isSnapshot(self):
651 return self.sr.xapi.isSnapshot(self)
653 def isAttachedRW(self):
654 return util.is_attached_rw(
655 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
657 def getVHDBlocks(self):
658 val = self.updateBlockInfo()
659 bitmap = zlib.decompress(base64.b64decode(val))
660 return bitmap
662 def isCoalesceable(self):
663 """A VDI is coalesceable if it has no siblings and is not a leaf"""
664 return not self.scanError and \
665 self.parent and \
666 len(self.parent.children) == 1 and \
667 self.isHidden() and \
668 len(self.children) > 0
670 def isLeafCoalesceable(self):
671 """A VDI is leaf-coalesceable if it has no siblings and is a leaf"""
672 return not self.scanError and \
673 self.parent and \
674 len(self.parent.children) == 1 and \
675 not self.isHidden() and \
676 len(self.children) == 0
678 def canLiveCoalesce(self, speed):
679 """Can we stop-and-leaf-coalesce this VDI? The VDI must be
680 isLeafCoalesceable() already"""
681 feasibleSize = False
682 allowedDownTime = \
683 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT
684 vhd_size = self.getAllocatedSize()
685 if speed:
686 feasibleSize = \
687 vhd_size // speed < allowedDownTime
688 else:
689 feasibleSize = \
690 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE
692 return (feasibleSize or
693 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
695 def getAllPrunable(self):
696 if len(self.children) == 0: # base case
697 # it is possible to have a hidden leaf that was recently coalesced
698 # onto its parent, its children already relinked but not yet
699 # reloaded - in which case it may not be garbage collected yet:
700 # some tapdisks could still be using the file.
701 if self.sr.journaler.get(self.JRN_RELINK, self.uuid):
702 return []
703 if not self.scanError and self.isHidden():
704 return [self]
705 return []
707 thisPrunable = True
708 vdiList = []
709 for child in self.children:
710 childList = child.getAllPrunable()
711 vdiList.extend(childList)
712 if child not in childList:
713 thisPrunable = False
715 # We can destroy the current VDI if all childs are hidden BUT the
716 # current VDI must be hidden too to do that!
717 # Example in this case (after a failed live leaf coalesce):
718 #
719 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees):
720 # SMGC: [32436] b5458d61(1.000G/4.127M)
721 # SMGC: [32436] *OLD_b545(1.000G/4.129M)
722 #
723 # OLD_b545 is hidden and must be removed, but b5458d61 not.
724 # Normally we are not in this function when the delete action is
725 # executed but in `_liveLeafCoalesce`.
727 if not self.scanError and not self.isHidden() and thisPrunable:
728 vdiList.append(self)
729 return vdiList
731 def getSizeVHD(self) -> int:
732 return self._sizeVHD
734 def getAllocatedSize(self) -> int:
735 return self._sizeAllocated
737 def getTreeRoot(self):
738 "Get the root of the tree that self belongs to"
739 root = self
740 while root.parent:
741 root = root.parent
742 return root
744 def getTreeHeight(self):
745 "Get the height of the subtree rooted at self"
746 if len(self.children) == 0:
747 return 1
749 maxChildHeight = 0
750 for child in self.children:
751 childHeight = child.getTreeHeight()
752 if childHeight > maxChildHeight:
753 maxChildHeight = childHeight
755 return maxChildHeight + 1
757 def getAllLeaves(self):
758 "Get all leaf nodes in the subtree rooted at self"
759 if len(self.children) == 0:
760 return [self]
762 leaves = []
763 for child in self.children:
764 leaves.extend(child.getAllLeaves())
765 return leaves
767 def updateBlockInfo(self) -> Optional[str]:
768 val = base64.b64encode(self._queryVHDBlocks()).decode()
769 self.setConfig(VDI.DB_VHD_BLOCKS, val)
770 return val
772 def rename(self, uuid) -> None:
773 "Rename the VDI file"
774 assert(not self.sr.vdis.get(uuid))
775 self._clearRef()
776 oldUuid = self.uuid
777 self.uuid = uuid
778 self.children = []
779 # updating the children themselves is the responsibility of the caller
780 del self.sr.vdis[oldUuid]
781 self.sr.vdis[self.uuid] = self
783 def delete(self) -> None:
784 "Physically delete the VDI"
785 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
786 lock.Lock.cleanupAll(self.uuid)
787 self._clear()
789 def getParent(self) -> str:
790 return vhdutil.getParent(self.path, lambda x: x.strip()) 790 ↛ exitline 790 didn't run the lambda on line 790
792 def repair(self, parent) -> None:
793 vhdutil.repair(parent)
795 @override
796 def __str__(self) -> str:
797 strHidden = ""
798 if self.isHidden(): 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true
799 strHidden = "*"
800 strSizeVirt = "?"
801 if self.sizeVirt > 0: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true
802 strSizeVirt = Util.num2str(self.sizeVirt)
803 strSizeVHD = "?"
804 if self._sizeVHD > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true
805 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD)
806 strSizeAllocated = "?"
807 if self._sizeAllocated >= 0:
808 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated)
809 strType = ""
810 if self.raw:
811 strType = "[RAW]"
812 strSizeVHD = ""
814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt,
815 strSizeVHD, strSizeAllocated, strType)
817 def validate(self, fast=False) -> None:
818 if not vhdutil.check(self.path, fast=fast): 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true
819 raise util.SMException("VHD %s corrupted" % self)
821 def _clear(self):
822 self.uuid = ""
823 self.path = ""
824 self.parentUuid = ""
825 self.parent = None
826 self._clearRef()
828 def _clearRef(self):
829 self._vdiRef = None
831 def _doCoalesce(self) -> None:
832 """Coalesce self onto parent. Only perform the actual coalescing of
833 VHD, but not the subsequent relinking. We'll do that as the next step,
834 after reloading the entire SR in case things have changed while we
835 were coalescing"""
836 self.validate()
837 self.parent.validate(True)
838 self.parent._increaseSizeVirt(self.sizeVirt)
839 self.sr._updateSlavesOnResize(self.parent)
840 self._coalesceVHD(0)
841 self.parent.validate(True)
842 #self._verifyContents(0)
843 self.parent.updateBlockInfo()
845 def _verifyContents(self, timeOut):
846 Util.log(" Coalesce verification on %s" % self)
847 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
848 Util.runAbortable(lambda: self._runTapdiskDiff(), True,
849 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
850 Util.log(" Coalesce verification succeeded")
852 def _runTapdiskDiff(self):
853 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \
854 (self.getDriverName(), self.path, \
855 self.parent.getDriverName(), self.parent.path)
856 Util.doexec(cmd, 0)
857 return True
859 @staticmethod
860 def _reportCoalesceError(vdi, ce):
861 """Reports a coalesce error to XenCenter.
863 vdi: the VDI object on which the coalesce error occured
864 ce: the CommandException that was raised"""
866 msg_name = os.strerror(ce.code)
867 if ce.code == errno.ENOSPC:
868 # TODO We could add more information here, e.g. exactly how much
869 # space is required for the particular coalesce, as well as actions
870 # to be taken by the user and consequences of not taking these
871 # actions.
872 msg_body = 'Run out of space while coalescing.'
873 elif ce.code == errno.EIO:
874 msg_body = 'I/O error while coalescing.'
875 else:
876 msg_body = ''
877 util.SMlog('Coalesce failed on SR %s: %s (%s)'
878 % (vdi.sr.uuid, msg_name, msg_body))
880 # Create a XenCenter message, but don't spam.
881 xapi = vdi.sr.xapi.session.xenapi
882 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid)
883 oth_cfg = xapi.SR.get_other_config(sr_ref)
884 if COALESCE_ERR_RATE_TAG in oth_cfg:
885 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG])
886 else:
887 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
889 xcmsg = False
890 if coalesce_err_rate == 0:
891 xcmsg = True
892 elif coalesce_err_rate > 0:
893 now = datetime.datetime.now()
894 sm_cfg = xapi.SR.get_sm_config(sr_ref)
895 if COALESCE_LAST_ERR_TAG in sm_cfg:
896 # seconds per message (minimum distance in time between two
897 # messages in seconds)
898 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60)
899 last = datetime.datetime.fromtimestamp(
900 float(sm_cfg[COALESCE_LAST_ERR_TAG]))
901 if now - last >= spm:
902 xapi.SR.remove_from_sm_config(sr_ref,
903 COALESCE_LAST_ERR_TAG)
904 xcmsg = True
905 else:
906 xcmsg = True
907 if xcmsg:
908 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG,
909 str(now.strftime('%s')))
910 if xcmsg:
911 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
913 def coalesce(self) -> int:
914 # size is returned in sectors
915 return vhdutil.coalesce(self.path) * 512
917 @staticmethod
918 def _doCoalesceVHD(vdi):
919 try:
920 startTime = time.time()
921 vhdSize = vdi.getAllocatedSize()
922 coalesced_size = vdi.coalesce()
923 endTime = time.time()
924 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size)
925 except util.CommandException as ce:
926 # We use try/except for the following piece of code because it runs
927 # in a separate process context and errors will not be caught and
928 # reported by anyone.
929 try:
930 # Report coalesce errors back to user via XC
931 VDI._reportCoalesceError(vdi, ce)
932 except Exception as e:
933 util.SMlog('failed to create XenCenter message: %s' % e)
934 raise ce
935 except:
936 raise
938 def _vdi_is_raw(self, vdi_path):
939 """
940 Given path to vdi determine if it is raw
941 """
942 uuid = self.extractUuid(vdi_path)
943 return self.sr.vdis[uuid].raw
945 def _coalesceVHD(self, timeOut):
946 Util.log(" Running VHD coalesce on %s" % self)
947 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 947 ↛ exitline 947 didn't run the lambda on line 947
948 try:
949 util.fistpoint.activate_custom_fn(
950 "cleanup_coalesceVHD_inject_failure",
951 util.inject_failure)
952 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None,
953 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
954 except:
955 #exception at this phase could indicate a failure in vhd coalesce
956 # or a kill of vhd coalesce by runAbortable due to timeOut
957 # Try a repair and reraise the exception
958 parent = ""
959 try:
960 parent = self.getParent()
961 if not self._vdi_is_raw(parent):
962 # Repair error is logged and ignored. Error reraised later
963 util.SMlog('Coalesce failed on %s, attempting repair on ' \
964 'parent %s' % (self.uuid, parent))
965 self.repair(parent)
966 except Exception as e:
967 util.SMlog('(error ignored) Failed to repair parent %s ' \
968 'after failed coalesce on %s, err: %s' %
969 (parent, self.path, e))
970 raise
972 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid)
974 def _relinkSkip(self) -> None:
975 """Relink children of this VDI to point to the parent of this VDI"""
976 abortFlag = IPCFlag(self.sr.uuid)
977 for child in self.children:
978 if abortFlag.test(FLAG_TYPE_ABORT): 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true
979 raise AbortException("Aborting due to signal")
980 Util.log(" Relinking %s from %s to %s" % \
981 (child, self, self.parent))
982 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid)
983 child._setParent(self.parent)
984 self.children = []
986 def _reloadChildren(self, vdiSkip):
987 """Pause & unpause all VDIs in the subtree to cause blktap to reload
988 the VHD metadata for this file in any online VDI"""
989 abortFlag = IPCFlag(self.sr.uuid)
990 for child in self.children:
991 if child == vdiSkip:
992 continue
993 if abortFlag.test(FLAG_TYPE_ABORT): 993 ↛ 994line 993 didn't jump to line 994, because the condition on line 993 was never true
994 raise AbortException("Aborting due to signal")
995 Util.log(" Reloading VDI %s" % child)
996 child._reload()
998 def _reload(self):
999 """Pause & unpause to cause blktap to reload the VHD metadata"""
1000 for child in self.children: 1000 ↛ 1001line 1000 didn't jump to line 1001, because the loop on line 1000 never started
1001 child._reload()
1003 # only leaves can be attached
1004 if len(self.children) == 0: 1004 ↛ exitline 1004 didn't return from function '_reload', because the condition on line 1004 was never false
1005 try:
1006 self.delConfig(VDI.DB_VDI_RELINKING)
1007 except XenAPI.Failure as e:
1008 if not util.isInvalidVDI(e):
1009 raise
1010 self.refresh()
1012 def _tagChildrenForRelink(self):
1013 if len(self.children) == 0:
1014 retries = 0
1015 try:
1016 while retries < 15:
1017 retries += 1
1018 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
1019 Util.log("VDI %s is activating, wait to relink" %
1020 self.uuid)
1021 else:
1022 self.setConfig(VDI.DB_VDI_RELINKING, "True")
1024 if self.getConfig(VDI.DB_VDI_ACTIVATING):
1025 self.delConfig(VDI.DB_VDI_RELINKING)
1026 Util.log("VDI %s started activating while tagging" %
1027 self.uuid)
1028 else:
1029 return
1030 time.sleep(2)
1032 raise util.SMException("Failed to tag vdi %s for relink" % self)
1033 except XenAPI.Failure as e:
1034 if not util.isInvalidVDI(e):
1035 raise
1037 for child in self.children:
1038 child._tagChildrenForRelink()
1040 def _loadInfoParent(self):
1041 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid)
1042 if ret:
1043 self.parentUuid = ret
1045 def _setParent(self, parent) -> None:
1046 vhdutil.setParent(self.path, parent.path, False)
1047 self.parent = parent
1048 self.parentUuid = parent.uuid
1049 parent.children.append(self)
1050 try:
1051 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1052 Util.log("Updated the vhd-parent field for child %s with %s" % \
1053 (self.uuid, self.parentUuid))
1054 except:
1055 Util.log("Failed to update %s with vhd-parent field %s" % \
1056 (self.uuid, self.parentUuid))
1058 def isHidden(self) -> bool:
1059 if self._hidden is None: 1059 ↛ 1060line 1059 didn't jump to line 1060, because the condition on line 1059 was never true
1060 self._loadInfoHidden()
1061 return self._hidden
1063 def _loadInfoHidden(self) -> None:
1064 hidden = vhdutil.getHidden(self.path)
1065 self._hidden = (hidden != 0)
1067 def _setHidden(self, hidden=True) -> None:
1068 self._hidden = None
1069 vhdutil.setHidden(self.path, hidden)
1070 self._hidden = hidden
1072 def _increaseSizeVirt(self, size, atomic=True) -> None:
1073 """ensure the virtual size of 'self' is at least 'size'. Note that
1074 resizing a VHD must always be offline and atomically: the file must
1075 not be open by anyone and no concurrent operations may take place.
1076 Thus we use the Agent API call for performing paused atomic
1077 operations. If the caller is already in the atomic context, it must
1078 call with atomic = False"""
1079 if self.sizeVirt >= size: 1079 ↛ 1081line 1079 didn't jump to line 1081, because the condition on line 1079 was never false
1080 return
1081 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \
1082 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1084 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024
1085 if (size <= msize):
1086 vhdutil.setSizeVirtFast(self.path, size)
1087 else:
1088 if atomic:
1089 vdiList = self._getAllSubtree()
1090 self.sr.lock()
1091 try:
1092 self.sr.pauseVDIs(vdiList)
1093 try:
1094 self._setSizeVirt(size)
1095 finally:
1096 self.sr.unpauseVDIs(vdiList)
1097 finally:
1098 self.sr.unlock()
1099 else:
1100 self._setSizeVirt(size)
1102 self.sizeVirt = vhdutil.getSizeVirt(self.path)
1104 def _setSizeVirt(self, size) -> None:
1105 """WARNING: do not call this method directly unless all VDIs in the
1106 subtree are guaranteed to be unplugged (and remain so for the duration
1107 of the operation): this operation is only safe for offline VHDs"""
1108 jFile = os.path.join(self.sr.path, self.uuid)
1109 vhdutil.setSizeVirt(self.path, size, jFile)
1111 def _queryVHDBlocks(self) -> bytes:
1112 return vhdutil.getBlockBitmap(self.path)
1114 def _getCoalescedSizeData(self):
1115 """Get the data size of the resulting VHD if we coalesce self onto
1116 parent. We calculate the actual size by using the VHD block allocation
1117 information (as opposed to just adding up the two VHD sizes to get an
1118 upper bound)"""
1119 # make sure we don't use stale BAT info from vdi_rec since the child
1120 # was writable all this time
1121 self.delConfig(VDI.DB_VHD_BLOCKS)
1122 blocksChild = self.getVHDBlocks()
1123 blocksParent = self.parent.getVHDBlocks()
1124 numBlocks = Util.countBits(blocksChild, blocksParent)
1125 Util.log("Num combined blocks = %d" % numBlocks)
1126 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE
1127 assert(sizeData <= self.sizeVirt)
1128 return sizeData
1130 def _calcExtraSpaceForCoalescing(self) -> int:
1131 sizeData = self._getCoalescedSizeData()
1132 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \
1133 vhdutil.calcOverheadEmpty(self.sizeVirt)
1134 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1135 return sizeCoalesced - self.parent.getSizeVHD()
1137 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1138 """How much extra space in the SR will be required to
1139 [live-]leaf-coalesce this VDI"""
1140 # the space requirements are the same as for inline coalesce
1141 return self._calcExtraSpaceForCoalescing()
1143 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1144 """How much extra space in the SR will be required to
1145 snapshot-coalesce this VDI"""
1146 return self._calcExtraSpaceForCoalescing() + \
1147 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
1149 def _getAllSubtree(self):
1150 """Get self and all VDIs in the subtree of self as a flat list"""
1151 vdiList = [self]
1152 for child in self.children:
1153 vdiList.extend(child._getAllSubtree())
1154 return vdiList
1157class FileVDI(VDI):
1158 """Object representing a VDI in a file-based SR (EXT or NFS)"""
1160 @override
1161 @staticmethod
1162 def extractUuid(path):
1163 path = os.path.basename(path.strip())
1164 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1164 ↛ 1166line 1164 didn't jump to line 1166, because the condition on line 1164 was never true
1165 path.endswith(vhdutil.FILE_EXTN_RAW)):
1166 return None
1167 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \
1168 vhdutil.FILE_EXTN_RAW, "")
1169 # TODO: validate UUID format
1170 return uuid
1172 def __init__(self, sr, uuid, raw):
1173 VDI.__init__(self, sr, uuid, raw)
1174 if self.raw: 1174 ↛ 1175line 1174 didn't jump to line 1175, because the condition on line 1174 was never true
1175 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW)
1176 else:
1177 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD)
1179 @override
1180 def load(self, info=None) -> None:
1181 if not info:
1182 if not util.pathexists(self.path):
1183 raise util.SMException("%s not found" % self.path)
1184 try:
1185 info = vhdutil.getVHDInfo(self.path, self.extractUuid)
1186 except util.SMException:
1187 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid)
1188 return
1189 self.parent = None
1190 self.children = []
1191 self.parentUuid = info.parentUuid
1192 self.sizeVirt = info.sizeVirt
1193 self._sizeVHD = info.sizePhys
1194 self._sizeAllocated = info.sizeAllocated
1195 self._hidden = info.hidden
1196 self.scanError = False
1197 self.path = os.path.join(self.sr.path, "%s%s" % \
1198 (self.uuid, vhdutil.FILE_EXTN_VHD))
1200 @override
1201 def rename(self, uuid) -> None:
1202 oldPath = self.path
1203 VDI.rename(self, uuid)
1204 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD)
1205 self.path = os.path.join(self.sr.path, self.fileName)
1206 assert(not util.pathexists(self.path))
1207 Util.log("Renaming %s -> %s" % (oldPath, self.path))
1208 os.rename(oldPath, self.path)
1210 @override
1211 def delete(self) -> None:
1212 if len(self.children) > 0: 1212 ↛ 1213line 1212 didn't jump to line 1213, because the condition on line 1212 was never true
1213 raise util.SMException("VDI %s has children, can't delete" % \
1214 self.uuid)
1215 try:
1216 self.sr.lock()
1217 try:
1218 os.unlink(self.path)
1219 self.sr.forgetVDI(self.uuid)
1220 finally:
1221 self.sr.unlock()
1222 except OSError:
1223 raise util.SMException("os.unlink(%s) failed" % self.path)
1224 VDI.delete(self)
1226 @override
1227 def getAllocatedSize(self) -> int:
1228 if self._sizeAllocated == -1: 1228 ↛ 1229line 1228 didn't jump to line 1229, because the condition on line 1228 was never true
1229 self._sizeAllocated = vhdutil.getAllocatedSize(self.path)
1230 return self._sizeAllocated
1233class LVHDVDI(VDI):
1234 """Object representing a VDI in an LVHD SR"""
1236 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent
1237 DRIVER_NAME_RAW = "aio"
1239 @override
1240 def load(self, info=None) -> None:
1241 # `info` is always set. `None` default value is only here to match parent method.
1242 assert info, "No info given to LVHDVDI.load"
1243 self.parent = None
1244 self.children = []
1245 self._sizeVHD = -1
1246 self._sizeAllocated = -1
1247 self.scanError = info.scanError
1248 self.sizeLV = info.sizeLV
1249 self.sizeVirt = info.sizeVirt
1250 self.fileName = info.lvName
1251 self.lvActive = info.lvActive
1252 self.lvOpen = info.lvOpen
1253 self.lvReadonly = info.lvReadonly
1254 self._hidden = info.hidden
1255 self.parentUuid = info.parentUuid
1256 self.path = os.path.join(self.sr.path, self.fileName)
1258 @override
1259 @staticmethod
1260 def extractUuid(path):
1261 return lvhdutil.extractUuid(path)
1263 @override
1264 def getDriverName(self) -> str:
1265 if self.raw:
1266 return self.DRIVER_NAME_RAW
1267 return self.DRIVER_NAME_VHD
1269 def inflate(self, size):
1270 """inflate the LV containing the VHD to 'size'"""
1271 if self.raw:
1272 return
1273 self._activate()
1274 self.sr.lock()
1275 try:
1276 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size)
1277 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid)
1278 finally:
1279 self.sr.unlock()
1280 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1281 self._sizeVHD = -1
1282 self._sizeAllocated = -1
1284 def deflate(self):
1285 """deflate the LV containing the VHD to minimum"""
1286 if self.raw:
1287 return
1288 self._activate()
1289 self.sr.lock()
1290 try:
1291 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD())
1292 finally:
1293 self.sr.unlock()
1294 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1295 self._sizeVHD = -1
1296 self._sizeAllocated = -1
1298 def inflateFully(self):
1299 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt))
1301 def inflateParentForCoalesce(self):
1302 """Inflate the parent only as much as needed for the purposes of
1303 coalescing"""
1304 if self.parent.raw:
1305 return
1306 inc = self._calcExtraSpaceForCoalescing()
1307 if inc > 0:
1308 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid)
1309 self.parent.inflate(self.parent.sizeLV + inc)
1311 @override
1312 def updateBlockInfo(self) -> Optional[str]:
1313 if not self.raw:
1314 return VDI.updateBlockInfo(self)
1315 return None
1317 @override
1318 def rename(self, uuid) -> None:
1319 oldUuid = self.uuid
1320 oldLVName = self.fileName
1321 VDI.rename(self, uuid)
1322 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid
1323 if self.raw:
1324 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid
1325 self.path = os.path.join(self.sr.path, self.fileName)
1326 assert(not self.sr.lvmCache.checkLV(self.fileName))
1328 self.sr.lvmCache.rename(oldLVName, self.fileName)
1329 if self.sr.lvActivator.get(oldUuid, False):
1330 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
1332 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid
1333 (cnt, bcnt) = RefCounter.check(oldUuid, ns)
1334 RefCounter.set(self.uuid, cnt, bcnt, ns)
1335 RefCounter.reset(oldUuid, ns)
1337 @override
1338 def delete(self) -> None:
1339 if len(self.children) > 0:
1340 raise util.SMException("VDI %s has children, can't delete" % \
1341 self.uuid)
1342 self.sr.lock()
1343 try:
1344 self.sr.lvmCache.remove(self.fileName)
1345 self.sr.forgetVDI(self.uuid)
1346 finally:
1347 self.sr.unlock()
1348 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
1349 VDI.delete(self)
1351 @override
1352 def getSizeVHD(self) -> int:
1353 if self._sizeVHD == -1:
1354 self._loadInfoSizeVHD()
1355 return self._sizeVHD
1357 def _loadInfoSizeVHD(self):
1358 """Get the physical utilization of the VHD file. We do it individually
1359 (and not using the VHD batch scanner) as an optimization: this info is
1360 relatively expensive and we need it only for VDI's involved in
1361 coalescing."""
1362 if self.raw:
1363 return
1364 self._activate()
1365 self._sizeVHD = vhdutil.getSizePhys(self.path)
1366 if self._sizeVHD <= 0:
1367 raise util.SMException("phys size of %s = %d" % \
1368 (self, self._sizeVHD))
1370 @override
1371 def getAllocatedSize(self) -> int:
1372 if self._sizeAllocated == -1:
1373 self._loadInfoSizeAllocated()
1374 return self._sizeAllocated
1376 def _loadInfoSizeAllocated(self):
1377 """
1378 Get the allocated size of the VHD volume.
1379 """
1380 if self.raw:
1381 return
1382 self._activate()
1383 self._sizeAllocated = vhdutil.getAllocatedSize(self.path)
1385 @override
1386 def _loadInfoHidden(self) -> None:
1387 if self.raw:
1388 self._hidden = self.sr.lvmCache.getHidden(self.fileName)
1389 else:
1390 VDI._loadInfoHidden(self)
1392 @override
1393 def _setHidden(self, hidden=True) -> None:
1394 if self.raw:
1395 self._hidden = None
1396 self.sr.lvmCache.setHidden(self.fileName, hidden)
1397 self._hidden = hidden
1398 else:
1399 VDI._setHidden(self, hidden)
1401 @override
1402 def __str__(self) -> str:
1403 strType = "VHD"
1404 if self.raw:
1405 strType = "RAW"
1406 strHidden = ""
1407 if self.isHidden():
1408 strHidden = "*"
1409 strSizeVHD = ""
1410 if self._sizeVHD > 0:
1411 strSizeVHD = Util.num2str(self._sizeVHD)
1412 strSizeAllocated = ""
1413 if self._sizeAllocated >= 0:
1414 strSizeAllocated = Util.num2str(self._sizeAllocated)
1415 strActive = "n"
1416 if self.lvActive:
1417 strActive = "a"
1418 if self.lvOpen:
1419 strActive += "o"
1420 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType,
1421 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated,
1422 Util.num2str(self.sizeLV), strActive)
1424 @override
1425 def validate(self, fast=False) -> None:
1426 if not self.raw:
1427 VDI.validate(self, fast)
1429 @override
1430 def _doCoalesce(self) -> None:
1431 """LVHD parents must first be activated, inflated, and made writable"""
1432 try:
1433 self._activateChain()
1434 self.sr.lvmCache.setReadonly(self.parent.fileName, False)
1435 self.parent.validate()
1436 self.inflateParentForCoalesce()
1437 VDI._doCoalesce(self)
1438 finally:
1439 self.parent._loadInfoSizeVHD()
1440 self.parent.deflate()
1441 self.sr.lvmCache.setReadonly(self.parent.fileName, True)
1443 @override
1444 def _setParent(self, parent) -> None:
1445 self._activate()
1446 if self.lvReadonly:
1447 self.sr.lvmCache.setReadonly(self.fileName, False)
1449 try:
1450 vhdutil.setParent(self.path, parent.path, parent.raw)
1451 finally:
1452 if self.lvReadonly:
1453 self.sr.lvmCache.setReadonly(self.fileName, True)
1454 self._deactivate()
1455 self.parent = parent
1456 self.parentUuid = parent.uuid
1457 parent.children.append(self)
1458 try:
1459 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1460 Util.log("Updated the vhd-parent field for child %s with %s" % \
1461 (self.uuid, self.parentUuid))
1462 except:
1463 Util.log("Failed to update the vhd-parent with %s for child %s" % \
1464 (self.parentUuid, self.uuid))
1466 def _activate(self):
1467 self.sr.lvActivator.activate(self.uuid, self.fileName, False)
1469 def _activateChain(self):
1470 vdi = self
1471 while vdi:
1472 vdi._activate()
1473 vdi = vdi.parent
1475 def _deactivate(self):
1476 self.sr.lvActivator.deactivate(self.uuid, False)
1478 @override
1479 def _increaseSizeVirt(self, size, atomic=True) -> None:
1480 "ensure the virtual size of 'self' is at least 'size'"
1481 self._activate()
1482 if not self.raw:
1483 VDI._increaseSizeVirt(self, size, atomic)
1484 return
1486 # raw VDI case
1487 offset = self.sizeLV
1488 if self.sizeVirt < size:
1489 oldSize = self.sizeLV
1490 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size)
1491 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV))
1492 self.sr.lvmCache.setSize(self.fileName, self.sizeLV)
1493 offset = oldSize
1494 unfinishedZero = False
1495 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid)
1496 if jval:
1497 unfinishedZero = True
1498 offset = int(jval)
1499 length = self.sizeLV - offset
1500 if not length:
1501 return
1503 if unfinishedZero:
1504 Util.log(" ==> Redoing unfinished zeroing out")
1505 else:
1506 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \
1507 str(offset))
1508 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1509 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1510 func = lambda: util.zeroOut(self.path, offset, length)
1511 Util.runAbortable(func, True, self.sr.uuid, abortTest,
1512 VDI.POLL_INTERVAL, 0)
1513 self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
1515 @override
1516 def _setSizeVirt(self, size) -> None:
1517 """WARNING: do not call this method directly unless all VDIs in the
1518 subtree are guaranteed to be unplugged (and remain so for the duration
1519 of the operation): this operation is only safe for offline VHDs"""
1520 self._activate()
1521 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid,
1522 vhdutil.MAX_VHD_JOURNAL_SIZE)
1523 try:
1524 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid,
1525 size, jFile)
1526 finally:
1527 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid)
1529 @override
1530 def _queryVHDBlocks(self) -> bytes:
1531 self._activate()
1532 return VDI._queryVHDBlocks(self)
1534 @override
1535 def _calcExtraSpaceForCoalescing(self) -> int:
1536 if self.parent.raw:
1537 return 0 # raw parents are never deflated in the first place
1538 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData())
1539 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1540 return sizeCoalesced - self.parent.sizeLV
1542 @override
1543 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1544 """How much extra space in the SR will be required to
1545 [live-]leaf-coalesce this VDI"""
1546 # we can deflate the leaf to minimize the space requirements
1547 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD())
1548 return self._calcExtraSpaceForCoalescing() - deflateDiff
1550 @override
1551 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1552 return self._calcExtraSpaceForCoalescing() + \
1553 lvhdutil.calcSizeLV(self.getSizeVHD())
1556class LinstorVDI(VDI):
1557 """Object representing a VDI in a LINSTOR SR"""
1559 VOLUME_LOCK_TIMEOUT = 30
1561 @override
1562 def load(self, info=None) -> None:
1563 self.parentUuid = info.parentUuid
1564 self.scanError = True
1565 self.parent = None
1566 self.children = []
1568 self.fileName = self.sr._linstor.get_volume_name(self.uuid)
1569 self.path = self.sr._linstor.build_device_path(self.fileName)
1571 if not info:
1572 try:
1573 info = self.sr._vhdutil.get_vhd_info(self.uuid)
1574 except util.SMException:
1575 Util.log(
1576 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid)
1577 )
1578 return
1580 self.parentUuid = info.parentUuid
1581 self.sizeVirt = info.sizeVirt
1582 self._sizeVHD = -1
1583 self._sizeAllocated = -1
1584 self.drbd_size = -1
1585 self._hidden = info.hidden
1586 self.scanError = False
1587 self.vdi_type = vhdutil.VDI_TYPE_VHD
1589 @override
1590 def getSizeVHD(self, fetch=False) -> int:
1591 if self._sizeVHD < 0 or fetch:
1592 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid)
1593 return self._sizeVHD
1595 def getDrbdSize(self, fetch=False):
1596 if self.drbd_size < 0 or fetch:
1597 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid)
1598 return self.drbd_size
1600 @override
1601 def getAllocatedSize(self) -> int:
1602 if self._sizeAllocated == -1:
1603 if not self.raw:
1604 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid)
1605 return self._sizeAllocated
1607 def inflate(self, size):
1608 if self.raw:
1609 return
1610 self.sr.lock()
1611 try:
1612 # Ensure we use the real DRBD size and not the cached one.
1613 # Why? Because this attribute can be changed if volume is resized by user.
1614 self.drbd_size = self.getDrbdSize(fetch=True)
1615 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size)
1616 finally:
1617 self.sr.unlock()
1618 self.drbd_size = -1
1619 self._sizeVHD = -1
1620 self._sizeAllocated = -1
1622 def deflate(self):
1623 if self.raw:
1624 return
1625 self.sr.lock()
1626 try:
1627 # Ensure we use the real sizes and not the cached info.
1628 self.drbd_size = self.getDrbdSize(fetch=True)
1629 self._sizeVHD = self.getSizeVHD(fetch=True)
1630 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False)
1631 finally:
1632 self.sr.unlock()
1633 self.drbd_size = -1
1634 self._sizeVHD = -1
1635 self._sizeAllocated = -1
1637 def inflateFully(self):
1638 if not self.raw:
1639 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type))
1641 @override
1642 def rename(self, uuid) -> None:
1643 Util.log('Renaming {} -> {} (path={})'.format(
1644 self.uuid, uuid, self.path
1645 ))
1646 self.sr._linstor.update_volume_uuid(self.uuid, uuid)
1647 VDI.rename(self, uuid)
1649 @override
1650 def delete(self) -> None:
1651 if len(self.children) > 0:
1652 raise util.SMException(
1653 'VDI {} has children, can\'t delete'.format(self.uuid)
1654 )
1655 self.sr.lock()
1656 try:
1657 self.sr._linstor.destroy_volume(self.uuid)
1658 self.sr.forgetVDI(self.uuid)
1659 finally:
1660 self.sr.unlock()
1661 VDI.delete(self)
1663 @override
1664 def validate(self, fast=False) -> None:
1665 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast):
1666 raise util.SMException('VHD {} corrupted'.format(self))
1668 @override
1669 def pause(self, failfast=False) -> None:
1670 self.sr._linstor.ensure_volume_is_not_locked(
1671 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1672 )
1673 return super(LinstorVDI, self).pause(failfast)
1675 @override
1676 def coalesce(self) -> int:
1677 # Note: We raise `SMException` here to skip the current coalesce in case of failure.
1678 # Using another exception we can't execute the next coalesce calls.
1679 return self.sr._vhdutil.force_coalesce(self.path) * 512
1681 @override
1682 def getParent(self) -> str:
1683 return self.sr._vhdutil.get_parent(
1684 self.sr._linstor.get_volume_uuid_from_device_path(self.path)
1685 )
1687 @override
1688 def repair(self, parent_uuid) -> None:
1689 self.sr._vhdutil.force_repair(
1690 self.sr._linstor.get_device_path(parent_uuid)
1691 )
1693 @override
1694 def _relinkSkip(self) -> None:
1695 abortFlag = IPCFlag(self.sr.uuid)
1696 for child in self.children:
1697 if abortFlag.test(FLAG_TYPE_ABORT):
1698 raise AbortException('Aborting due to signal')
1699 Util.log(
1700 ' Relinking {} from {} to {}'.format(
1701 child, self, self.parent
1702 )
1703 )
1705 session = child.sr.xapi.session
1706 sr_uuid = child.sr.uuid
1707 vdi_uuid = child.uuid
1708 try:
1709 self.sr._linstor.ensure_volume_is_not_locked(
1710 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1711 )
1712 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid)
1713 child._setParent(self.parent)
1714 finally:
1715 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
1716 self.children = []
1718 @override
1719 def _setParent(self, parent) -> None:
1720 self.sr._linstor.get_device_path(self.uuid)
1721 self.sr._vhdutil.force_parent(self.path, parent.path)
1722 self.parent = parent
1723 self.parentUuid = parent.uuid
1724 parent.children.append(self)
1725 try:
1726 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1727 Util.log("Updated the vhd-parent field for child %s with %s" % \
1728 (self.uuid, self.parentUuid))
1729 except:
1730 Util.log("Failed to update %s with vhd-parent field %s" % \
1731 (self.uuid, self.parentUuid))
1733 @override
1734 def _doCoalesce(self) -> None:
1735 try:
1736 self._activateChain()
1737 self.parent.validate()
1738 self._inflateParentForCoalesce()
1739 VDI._doCoalesce(self)
1740 finally:
1741 self.parent.deflate()
1743 def _activateChain(self):
1744 vdi = self
1745 while vdi:
1746 try:
1747 p = self.sr._linstor.get_device_path(vdi.uuid)
1748 except Exception as e:
1749 # Use SMException to skip coalesce.
1750 # Otherwise the GC is stopped...
1751 raise util.SMException(str(e))
1752 vdi = vdi.parent
1754 @override
1755 def _setHidden(self, hidden=True) -> None:
1756 HIDDEN_TAG = 'hidden'
1758 if self.raw:
1759 self._hidden = None
1760 self.sr._linstor.update_volume_metadata(self.uuid, {
1761 HIDDEN_TAG: hidden
1762 })
1763 self._hidden = hidden
1764 else:
1765 VDI._setHidden(self, hidden)
1767 @override
1768 def _increaseSizeVirt(self, size, atomic=True):
1769 if self.raw:
1770 offset = self.drbd_size
1771 if self.sizeVirt < size:
1772 oldSize = self.drbd_size
1773 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size)
1774 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size))
1775 self.sr._linstor.resize_volume(self.uuid, self.drbd_size)
1776 offset = oldSize
1777 unfinishedZero = False
1778 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid)
1779 if jval:
1780 unfinishedZero = True
1781 offset = int(jval)
1782 length = self.drbd_size - offset
1783 if not length:
1784 return
1786 if unfinishedZero:
1787 Util.log(" ==> Redoing unfinished zeroing out")
1788 else:
1789 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset))
1790 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1791 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1792 func = lambda: util.zeroOut(self.path, offset, length)
1793 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
1794 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid)
1795 return
1797 if self.sizeVirt >= size:
1798 return
1799 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \
1800 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1802 msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024
1803 if (size <= msize):
1804 self.sr._vhdutil.set_size_virt_fast(self.path, size)
1805 else:
1806 if atomic:
1807 vdiList = self._getAllSubtree()
1808 self.sr.lock()
1809 try:
1810 self.sr.pauseVDIs(vdiList)
1811 try:
1812 self._setSizeVirt(size)
1813 finally:
1814 self.sr.unpauseVDIs(vdiList)
1815 finally:
1816 self.sr.unlock()
1817 else:
1818 self._setSizeVirt(size)
1820 self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid)
1822 @override
1823 def _setSizeVirt(self, size) -> None:
1824 jfile = self.uuid + '-jvhd'
1825 self.sr._linstor.create_volume(
1826 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile
1827 )
1828 try:
1829 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type))
1830 self.sr._vhdutil.set_size_virt(size, jfile)
1831 finally:
1832 try:
1833 self.sr._linstor.destroy_volume(jfile)
1834 except Exception:
1835 # We can ignore it, in any case this volume is not persistent.
1836 pass
1838 @override
1839 def _queryVHDBlocks(self) -> bytes:
1840 return self.sr._vhdutil.get_block_bitmap(self.uuid)
1842 def _inflateParentForCoalesce(self):
1843 if self.parent.raw:
1844 return
1845 inc = self._calcExtraSpaceForCoalescing()
1846 if inc > 0:
1847 self.parent.inflate(self.parent.getDrbdSize() + inc)
1849 @override
1850 def _calcExtraSpaceForCoalescing(self) -> int:
1851 if self.parent.raw:
1852 return 0
1853 size_coalesced = LinstorVhdUtil.compute_volume_size(
1854 self._getCoalescedSizeData(), self.vdi_type
1855 )
1856 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced))
1857 return size_coalesced - self.parent.getDrbdSize()
1859 @override
1860 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1861 assert self.getDrbdSize() > 0
1862 assert self.getSizeVHD() > 0
1863 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD())
1864 assert deflate_diff >= 0
1865 return self._calcExtraSpaceForCoalescing() - deflate_diff
1867 @override
1868 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1869 assert self.getSizeVHD() > 0
1870 return self._calcExtraSpaceForCoalescing() + \
1871 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD())
1873################################################################################
1874#
1875# SR
1876#
1877class SR(object):
1878 class LogFilter:
1879 def __init__(self, sr):
1880 self.sr = sr
1881 self.stateLogged = False
1882 self.prevState = {}
1883 self.currState = {}
1885 def logState(self):
1886 changes = ""
1887 self.currState.clear()
1888 for vdi in self.sr.vdiTrees:
1889 self.currState[vdi.uuid] = self._getTreeStr(vdi)
1890 if not self.prevState.get(vdi.uuid) or \
1891 self.prevState[vdi.uuid] != self.currState[vdi.uuid]:
1892 changes += self.currState[vdi.uuid]
1894 for uuid in self.prevState:
1895 if not self.currState.get(uuid):
1896 changes += "Tree %s gone\n" % uuid
1898 result = "SR %s (%d VDIs in %d VHD trees): " % \
1899 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
1901 if len(changes) > 0:
1902 if self.stateLogged:
1903 result += "showing only VHD trees that changed:"
1904 result += "\n%s" % changes
1905 else:
1906 result += "no changes"
1908 for line in result.split("\n"):
1909 Util.log("%s" % line)
1910 self.prevState.clear()
1911 for key, val in self.currState.items():
1912 self.prevState[key] = val
1913 self.stateLogged = True
1915 def logNewVDI(self, uuid):
1916 if self.stateLogged:
1917 Util.log("Found new VDI when scanning: %s" % uuid)
1919 def _getTreeStr(self, vdi, indent=8):
1920 treeStr = "%s%s\n" % (" " * indent, vdi)
1921 for child in vdi.children:
1922 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT)
1923 return treeStr
1925 TYPE_FILE = "file"
1926 TYPE_LVHD = "lvhd"
1927 TYPE_LINSTOR = "linstor"
1928 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR]
1930 LOCK_RETRY_INTERVAL = 3
1931 LOCK_RETRY_ATTEMPTS = 20
1932 LOCK_RETRY_ATTEMPTS_LOCK = 100
1934 SCAN_RETRY_ATTEMPTS = 3
1936 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM)
1937 TMP_RENAME_PREFIX = "OLD_"
1939 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline"
1940 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override"
1942 @staticmethod
1943 def getInstance(uuid, xapiSession, createLock=True, force=False):
1944 xapi = XAPI(xapiSession, uuid)
1945 type = normalizeType(xapi.srRecord["type"])
1946 if type == SR.TYPE_FILE:
1947 return FileSR(uuid, xapi, createLock, force)
1948 elif type == SR.TYPE_LVHD:
1949 return LVHDSR(uuid, xapi, createLock, force)
1950 elif type == SR.TYPE_LINSTOR:
1951 return LinstorSR(uuid, xapi, createLock, force)
1952 raise util.SMException("SR type %s not recognized" % type)
1954 def __init__(self, uuid, xapi, createLock, force):
1955 self.logFilter = self.LogFilter(self)
1956 self.uuid = uuid
1957 self.path = ""
1958 self.name = ""
1959 self.vdis = {}
1960 self.vdiTrees = []
1961 self.journaler = None
1962 self.xapi = xapi
1963 self._locked = 0
1964 self._srLock = None
1965 if createLock: 1965 ↛ 1966line 1965 didn't jump to line 1966, because the condition on line 1965 was never true
1966 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid)
1967 else:
1968 Util.log("Requested no SR locking")
1969 self.name = self.xapi.srRecord["name_label"]
1970 self._failedCoalesceTargets = []
1972 if not self.xapi.isPluggedHere():
1973 if force: 1973 ↛ 1974line 1973 didn't jump to line 1974, because the condition on line 1973 was never true
1974 Util.log("SR %s not attached on this host, ignoring" % uuid)
1975 else:
1976 if not self.wait_for_plug():
1977 raise util.SMException("SR %s not attached on this host" % uuid)
1979 if force: 1979 ↛ 1980line 1979 didn't jump to line 1980, because the condition on line 1979 was never true
1980 Util.log("Not checking if we are Master (SR %s)" % uuid)
1981 elif not self.xapi.isMaster(): 1981 ↛ 1982line 1981 didn't jump to line 1982, because the condition on line 1981 was never true
1982 raise util.SMException("This host is NOT master, will not run")
1984 self.no_space_candidates = {}
1986 def msg_cleared(self, xapi_session, msg_ref):
1987 try:
1988 msg = xapi_session.xenapi.message.get_record(msg_ref)
1989 except XenAPI.Failure:
1990 return True
1992 return msg is None
1994 def check_no_space_candidates(self):
1995 xapi_session = self.xapi.getSession()
1997 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE)
1998 if self.no_space_candidates:
1999 if msg_id is None or self.msg_cleared(xapi_session, msg_id):
2000 util.SMlog("Could not coalesce due to a lack of space "
2001 f"in SR {self.uuid}")
2002 msg_body = ("Unable to perform data coalesce due to a lack "
2003 f"of space in SR {self.uuid}")
2004 msg_id = xapi_session.xenapi.message.create(
2005 'SM_GC_NO_SPACE',
2006 3,
2007 "SR",
2008 self.uuid,
2009 msg_body)
2010 xapi_session.xenapi.SR.remove_from_sm_config(
2011 self.xapi.srRef, VDI.DB_GC_NO_SPACE)
2012 xapi_session.xenapi.SR.add_to_sm_config(
2013 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id)
2015 for candidate in self.no_space_candidates.values():
2016 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id)
2017 elif msg_id is not None:
2018 # Everything was coalescable, remove the message
2019 xapi_session.xenapi.message.destroy(msg_id)
2021 def clear_no_space_msg(self, vdi):
2022 msg_id = None
2023 try:
2024 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE)
2025 except XenAPI.Failure:
2026 pass
2028 self.no_space_candidates.pop(vdi.uuid, None)
2029 if msg_id is not None: 2029 ↛ exitline 2029 didn't return from function 'clear_no_space_msg', because the condition on line 2029 was never false
2030 vdi.delConfig(VDI.DB_GC_NO_SPACE)
2033 def wait_for_plug(self):
2034 for _ in range(1, 10):
2035 time.sleep(2)
2036 if self.xapi.isPluggedHere():
2037 return True
2038 return False
2040 def gcEnabled(self, refresh=True):
2041 if refresh:
2042 self.xapi.srRecord = \
2043 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef)
2044 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false":
2045 Util.log("GC is disabled for this SR, abort")
2046 return False
2047 return True
2049 def scan(self, force=False) -> None:
2050 """Scan the SR and load VDI info for each VDI. If called repeatedly,
2051 update VDI objects if they already exist"""
2052 pass
2054 def scanLocked(self, force=False):
2055 self.lock()
2056 try:
2057 self.scan(force)
2058 finally:
2059 self.unlock()
2061 def getVDI(self, uuid):
2062 return self.vdis.get(uuid)
2064 def hasWork(self):
2065 if len(self.findGarbage()) > 0:
2066 return True
2067 if self.findCoalesceable():
2068 return True
2069 if self.findLeafCoalesceable():
2070 return True
2071 if self.needUpdateBlockInfo():
2072 return True
2073 return False
2075 def findCoalesceable(self):
2076 """Find a coalesceable VDI. Return a vdi that should be coalesced
2077 (choosing one among all coalesceable candidates according to some
2078 criteria) or None if there is no VDI that could be coalesced"""
2080 candidates = []
2082 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE)
2083 if srSwitch == "false":
2084 Util.log("Coalesce disabled for this SR")
2085 return candidates
2087 # finish any VDI for which a relink journal entry exists first
2088 journals = self.journaler.getAll(VDI.JRN_RELINK)
2089 for uuid in journals:
2090 vdi = self.getVDI(uuid)
2091 if vdi and vdi not in self._failedCoalesceTargets:
2092 return vdi
2094 for vdi in self.vdis.values():
2095 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
2096 candidates.append(vdi)
2097 Util.log("%s is coalescable" % vdi.uuid)
2099 self.xapi.update_task_progress("coalescable", len(candidates))
2101 # pick one in the tallest tree
2102 treeHeight = dict()
2103 for c in candidates:
2104 height = c.getTreeRoot().getTreeHeight()
2105 if treeHeight.get(height):
2106 treeHeight[height].append(c)
2107 else:
2108 treeHeight[height] = [c]
2110 freeSpace = self.getFreeSpace()
2111 heights = list(treeHeight.keys())
2112 heights.sort(reverse=True)
2113 for h in heights:
2114 for c in treeHeight[h]:
2115 spaceNeeded = c._calcExtraSpaceForCoalescing()
2116 if spaceNeeded <= freeSpace:
2117 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h))
2118 self.clear_no_space_msg(c)
2119 return c
2120 else:
2121 self.no_space_candidates[c.uuid] = c
2122 Util.log("No space to coalesce %s (free space: %d)" % \
2123 (c, freeSpace))
2124 return None
2126 def getSwitch(self, key):
2127 return self.xapi.srRecord["other_config"].get(key)
2129 def forbiddenBySwitch(self, switch, condition, fail_msg):
2130 srSwitch = self.getSwitch(switch)
2131 ret = False
2132 if srSwitch:
2133 ret = srSwitch == condition
2135 if ret:
2136 Util.log(fail_msg)
2138 return ret
2140 def leafCoalesceForbidden(self):
2141 return (self.forbiddenBySwitch(VDI.DB_COALESCE,
2142 "false",
2143 "Coalesce disabled for this SR") or
2144 self.forbiddenBySwitch(VDI.DB_LEAFCLSC,
2145 VDI.LEAFCLSC_DISABLED,
2146 "Leaf-coalesce disabled for this SR"))
2148 def findLeafCoalesceable(self):
2149 """Find leaf-coalesceable VDIs in each VHD tree"""
2151 candidates = []
2152 if self.leafCoalesceForbidden():
2153 return candidates
2155 self.gatherLeafCoalesceable(candidates)
2157 self.xapi.update_task_progress("coalescable", len(candidates))
2159 freeSpace = self.getFreeSpace()
2160 for candidate in candidates:
2161 # check the space constraints to see if leaf-coalesce is actually
2162 # feasible for this candidate
2163 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing()
2164 spaceNeededLive = spaceNeeded
2165 if spaceNeeded > freeSpace:
2166 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing()
2167 if candidate.canLiveCoalesce(self.getStorageSpeed()):
2168 spaceNeeded = spaceNeededLive
2170 if spaceNeeded <= freeSpace:
2171 Util.log("Leaf-coalesce candidate: %s" % candidate)
2172 self.clear_no_space_msg(candidate)
2173 return candidate
2174 else:
2175 Util.log("No space to leaf-coalesce %s (free space: %d)" % \
2176 (candidate, freeSpace))
2177 if spaceNeededLive <= freeSpace:
2178 Util.log("...but enough space if skip snap-coalesce")
2179 candidate.setConfig(VDI.DB_LEAFCLSC,
2180 VDI.LEAFCLSC_OFFLINE)
2181 self.no_space_candidates[candidate.uuid] = candidate
2183 return None
2185 def gatherLeafCoalesceable(self, candidates):
2186 for vdi in self.vdis.values():
2187 if not vdi.isLeafCoalesceable():
2188 continue
2189 if vdi in self._failedCoalesceTargets:
2190 continue
2191 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET:
2192 Util.log("Skipping reset-on-boot %s" % vdi)
2193 continue
2194 if vdi.getConfig(vdi.DB_ALLOW_CACHING):
2195 Util.log("Skipping allow_caching=true %s" % vdi)
2196 continue
2197 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED:
2198 Util.log("Leaf-coalesce disabled for %s" % vdi)
2199 continue
2200 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or
2201 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
2202 continue
2203 candidates.append(vdi)
2205 def coalesce(self, vdi, dryRun=False):
2206 """Coalesce vdi onto parent"""
2207 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent))
2208 if dryRun: 2208 ↛ 2209line 2208 didn't jump to line 2209, because the condition on line 2208 was never true
2209 return
2211 try:
2212 self._coalesce(vdi)
2213 except util.SMException as e:
2214 if isinstance(e, AbortException): 2214 ↛ 2215line 2214 didn't jump to line 2215, because the condition on line 2214 was never true
2215 self.cleanup()
2216 raise
2217 else:
2218 self._failedCoalesceTargets.append(vdi)
2219 Util.logException("coalesce")
2220 Util.log("Coalesce failed, skipping")
2221 self.cleanup()
2223 def coalesceLeaf(self, vdi, dryRun=False):
2224 """Leaf-coalesce vdi onto parent"""
2225 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent))
2226 if dryRun:
2227 return
2229 try:
2230 uuid = vdi.uuid
2231 try:
2232 # "vdi" object will no longer be valid after this call
2233 self._coalesceLeaf(vdi)
2234 finally:
2235 vdi = self.getVDI(uuid)
2236 if vdi:
2237 vdi.delConfig(vdi.DB_LEAFCLSC)
2238 except AbortException:
2239 self.cleanup()
2240 raise
2241 except (util.SMException, XenAPI.Failure) as e:
2242 self._failedCoalesceTargets.append(vdi)
2243 Util.logException("leaf-coalesce")
2244 Util.log("Leaf-coalesce failed on %s, skipping" % vdi)
2245 self.cleanup()
2247 def garbageCollect(self, dryRun=False):
2248 vdiList = self.findGarbage()
2249 Util.log("Found %d VDIs for deletion:" % len(vdiList))
2250 for vdi in vdiList:
2251 Util.log(" %s" % vdi)
2252 if not dryRun:
2253 self.deleteVDIs(vdiList)
2254 self.cleanupJournals(dryRun)
2256 def findGarbage(self):
2257 vdiList = []
2258 for vdi in self.vdiTrees:
2259 vdiList.extend(vdi.getAllPrunable())
2260 return vdiList
2262 def deleteVDIs(self, vdiList) -> None:
2263 for vdi in vdiList:
2264 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT):
2265 raise AbortException("Aborting due to signal")
2266 Util.log("Deleting unlinked VDI %s" % vdi)
2267 self.deleteVDI(vdi)
2269 def deleteVDI(self, vdi) -> None:
2270 assert(len(vdi.children) == 0)
2271 del self.vdis[vdi.uuid]
2272 if vdi.parent: 2272 ↛ 2274line 2272 didn't jump to line 2274, because the condition on line 2272 was never false
2273 vdi.parent.children.remove(vdi)
2274 if vdi in self.vdiTrees: 2274 ↛ 2275line 2274 didn't jump to line 2275, because the condition on line 2274 was never true
2275 self.vdiTrees.remove(vdi)
2276 vdi.delete()
2278 def forgetVDI(self, vdiUuid) -> None:
2279 self.xapi.forgetVDI(self.uuid, vdiUuid)
2281 def pauseVDIs(self, vdiList) -> None:
2282 paused = []
2283 failed = False
2284 for vdi in vdiList:
2285 try:
2286 vdi.pause()
2287 paused.append(vdi)
2288 except:
2289 Util.logException("pauseVDIs")
2290 failed = True
2291 break
2293 if failed:
2294 self.unpauseVDIs(paused)
2295 raise util.SMException("Failed to pause VDIs")
2297 def unpauseVDIs(self, vdiList):
2298 failed = False
2299 for vdi in vdiList:
2300 try:
2301 vdi.unpause()
2302 except:
2303 Util.log("ERROR: Failed to unpause VDI %s" % vdi)
2304 failed = True
2305 if failed:
2306 raise util.SMException("Failed to unpause VDIs")
2308 def getFreeSpace(self) -> int:
2309 return 0
2311 def cleanup(self):
2312 Util.log("In cleanup")
2313 return
2315 @override
2316 def __str__(self) -> str:
2317 if self.name:
2318 ret = "%s ('%s')" % (self.uuid[0:4], self.name)
2319 else:
2320 ret = "%s" % self.uuid
2321 return ret
2323 def lock(self):
2324 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort
2325 signal to avoid deadlocking (trying to acquire the SR lock while the
2326 lock is held by a process that is trying to abort us)"""
2327 if not self._srLock:
2328 return
2330 if self._locked == 0:
2331 abortFlag = IPCFlag(self.uuid)
2332 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK):
2333 if self._srLock.acquireNoblock():
2334 self._locked += 1
2335 return
2336 if abortFlag.test(FLAG_TYPE_ABORT):
2337 raise AbortException("Abort requested")
2338 time.sleep(SR.LOCK_RETRY_INTERVAL)
2339 raise util.SMException("Unable to acquire the SR lock")
2341 self._locked += 1
2343 def unlock(self):
2344 if not self._srLock: 2344 ↛ 2346line 2344 didn't jump to line 2346, because the condition on line 2344 was never false
2345 return
2346 assert(self._locked > 0)
2347 self._locked -= 1
2348 if self._locked == 0:
2349 self._srLock.release()
2351 def needUpdateBlockInfo(self) -> bool:
2352 for vdi in self.vdis.values():
2353 if vdi.scanError or len(vdi.children) == 0:
2354 continue
2355 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2356 return True
2357 return False
2359 def updateBlockInfo(self) -> None:
2360 for vdi in self.vdis.values():
2361 if vdi.scanError or len(vdi.children) == 0:
2362 continue
2363 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2364 vdi.updateBlockInfo()
2366 def cleanupCoalesceJournals(self):
2367 """Remove stale coalesce VDI indicators"""
2368 entries = self.journaler.getAll(VDI.JRN_COALESCE)
2369 for uuid, jval in entries.items():
2370 self.journaler.remove(VDI.JRN_COALESCE, uuid)
2372 def cleanupJournals(self, dryRun=False):
2373 """delete journal entries for non-existing VDIs"""
2374 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]:
2375 entries = self.journaler.getAll(t)
2376 for uuid, jval in entries.items():
2377 if self.getVDI(uuid):
2378 continue
2379 if t == SR.JRN_CLONE:
2380 baseUuid, clonUuid = jval.split("_")
2381 if self.getVDI(baseUuid):
2382 continue
2383 Util.log(" Deleting stale '%s' journal entry for %s "
2384 "(%s)" % (t, uuid, jval))
2385 if not dryRun:
2386 self.journaler.remove(t, uuid)
2388 def cleanupCache(self, maxAge=-1) -> int:
2389 return 0
2391 def _coalesce(self, vdi):
2392 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2392 ↛ 2395line 2392 didn't jump to line 2395, because the condition on line 2392 was never true
2393 # this means we had done the actual coalescing already and just
2394 # need to finish relinking and/or refreshing the children
2395 Util.log("==> Coalesce apparently already done: skipping")
2396 else:
2397 # JRN_COALESCE is used to check which VDI is being coalesced in
2398 # order to decide whether to abort the coalesce. We remove the
2399 # journal as soon as the VHD coalesce step is done, because we
2400 # don't expect the rest of the process to take long
2401 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1")
2402 vdi._doCoalesce()
2403 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
2405 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid)
2407 # we now need to relink the children: lock the SR to prevent ops
2408 # like SM.clone from manipulating the VDIs we'll be relinking and
2409 # rescan the SR first in case the children changed since the last
2410 # scan
2411 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
2413 self.lock()
2414 try:
2415 vdi.parent._tagChildrenForRelink()
2416 self.scan()
2417 vdi._relinkSkip()
2418 finally:
2419 self.unlock()
2420 # Reload the children to leave things consistent
2421 vdi.parent._reloadChildren(vdi)
2423 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
2424 self.deleteVDI(vdi)
2426 class CoalesceTracker:
2427 GRACE_ITERATIONS = 2
2428 MAX_ITERATIONS_NO_PROGRESS = 3
2429 MAX_ITERATIONS = 20
2430 MAX_INCREASE_FROM_MINIMUM = 1.2
2431 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \
2432 " --> Final size {finSize}"
2434 def __init__(self, sr):
2435 self.itsNoProgress = 0
2436 self.its = 0
2437 self.minSize = float("inf")
2438 self._history = []
2439 self.reason = ""
2440 self.startSize = None
2441 self.finishSize = None
2442 self.sr = sr
2443 self.grace_remaining = self.GRACE_ITERATIONS
2445 @property
2446 def history(self):
2447 return [x['msg'] for x in self._history]
2449 def moving_average(self):
2450 """
2451 Calculate a three point moving average
2452 """
2453 assert len(self._history) >= 3
2455 mv_average = sum([x['finalsize'] for x in self._history]) / len(self._history)
2456 util.SMlog(f'Calculated moving average as {mv_average}')
2457 return mv_average
2459 def abortCoalesce(self, prevSize, curSize):
2460 self.its += 1
2461 self._history.append(
2462 {
2463 'finalsize': curSize,
2464 'msg': self.HISTORY_STRING.format(its=self.its,
2465 initSize=prevSize,
2466 finSize=curSize)
2467 }
2468 )
2470 self.finishSize = curSize
2472 if self.startSize is None:
2473 self.startSize = prevSize
2475 if curSize < self.minSize:
2476 self.minSize = curSize
2478 if prevSize < self.minSize:
2479 self.minSize = prevSize
2481 if self.its < 4:
2482 # Perform at least three iterations
2483 return False
2485 if prevSize >= curSize or curSize < self.moving_average():
2486 # We made progress
2487 return False
2488 else:
2489 self.itsNoProgress += 1
2490 Util.log("No progress, attempt:"
2491 " {attempt}".format(attempt=self.itsNoProgress))
2492 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid)
2494 if self.its > self.MAX_ITERATIONS:
2495 max = self.MAX_ITERATIONS
2496 self.reason = \
2497 "Max iterations ({max}) exceeded".format(max=max)
2498 return True
2500 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS:
2501 max = self.MAX_ITERATIONS_NO_PROGRESS
2502 self.reason = \
2503 "No progress made for {max} iterations".format(max=max)
2504 return True
2506 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize
2507 if curSize > maxSizeFromMin:
2508 self.grace_remaining -= 1
2509 if self.grace_remaining == 0:
2510 self.reason = "Unexpected bump in size," \
2511 " compared to minimum achieved"
2513 return True
2515 return False
2517 def printSizes(self):
2518 Util.log("Starting size was {size}"
2519 .format(size=self.startSize))
2520 Util.log("Final size was {size}"
2521 .format(size=self.finishSize))
2522 Util.log("Minimum size achieved was {size}"
2523 .format(size=self.minSize))
2525 def printReasoning(self):
2526 Util.log("Aborted coalesce")
2527 for hist in self.history:
2528 Util.log(hist)
2529 Util.log(self.reason)
2530 self.printSizes()
2532 def printSummary(self):
2533 if self.its == 0:
2534 return
2536 if self.reason: 2536 ↛ 2537line 2536 didn't jump to line 2537, because the condition on line 2536 was never true
2537 Util.log("Aborted coalesce")
2538 Util.log(self.reason)
2539 else:
2540 Util.log("Coalesce summary")
2542 Util.log(f"Performed {self.its} iterations")
2543 self.printSizes()
2546 def _coalesceLeaf(self, vdi):
2547 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
2548 complete due to external changes, namely vdi_delete and vdi_snapshot
2549 that alter leaf-coalescibility of vdi"""
2550 tracker = self.CoalesceTracker(self)
2551 while not vdi.canLiveCoalesce(self.getStorageSpeed()):
2552 prevSizeVHD = vdi.getSizeVHD()
2553 if not self._snapshotCoalesce(vdi): 2553 ↛ 2554line 2553 didn't jump to line 2554, because the condition on line 2553 was never true
2554 return False
2555 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()):
2556 tracker.printReasoning()
2557 raise util.SMException("VDI {uuid} could not be coalesced"
2558 .format(uuid=vdi.uuid))
2559 tracker.printSummary()
2560 return self._liveLeafCoalesce(vdi)
2562 def calcStorageSpeed(self, startTime, endTime, vhdSize):
2563 speed = None
2564 total_time = endTime - startTime
2565 if total_time > 0:
2566 speed = float(vhdSize) / float(total_time)
2567 return speed
2569 def writeSpeedToFile(self, speed):
2570 content = []
2571 speedFile = None
2572 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2573 self.lock()
2574 try:
2575 Util.log("Writing to file: {myfile}".format(myfile=path))
2576 lines = ""
2577 if not os.path.isfile(path):
2578 lines = str(speed) + "\n"
2579 else:
2580 speedFile = open(path, "r+")
2581 content = speedFile.readlines()
2582 content.append(str(speed) + "\n")
2583 if len(content) > N_RUNNING_AVERAGE:
2584 del content[0]
2585 lines = "".join(content)
2587 util.atomicFileWrite(path, VAR_RUN, lines)
2588 finally:
2589 if speedFile is not None:
2590 speedFile.close()
2591 Util.log("Closing file: {myfile}".format(myfile=path))
2592 self.unlock()
2594 def recordStorageSpeed(self, startTime, endTime, vhdSize):
2595 speed = self.calcStorageSpeed(startTime, endTime, vhdSize)
2596 if speed is None:
2597 return
2599 self.writeSpeedToFile(speed)
2601 def getStorageSpeed(self):
2602 speedFile = None
2603 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2604 self.lock()
2605 try:
2606 speed = None
2607 if os.path.isfile(path):
2608 speedFile = open(path)
2609 content = speedFile.readlines()
2610 try:
2611 content = [float(i) for i in content]
2612 except ValueError:
2613 Util.log("Something bad in the speed log:{log}".
2614 format(log=speedFile.readlines()))
2615 return speed
2617 if len(content):
2618 speed = sum(content) / float(len(content))
2619 if speed <= 0: 2619 ↛ 2621line 2619 didn't jump to line 2621, because the condition on line 2619 was never true
2620 # Defensive, should be impossible.
2621 Util.log("Bad speed: {speed} calculated for SR: {uuid}".
2622 format(speed=speed, uuid=self.uuid))
2623 speed = None
2624 else:
2625 Util.log("Speed file empty for SR: {uuid}".
2626 format(uuid=self.uuid))
2627 else:
2628 Util.log("Speed log missing for SR: {uuid}".
2629 format(uuid=self.uuid))
2630 return speed
2631 finally:
2632 if not (speedFile is None):
2633 speedFile.close()
2634 self.unlock()
2636 def _snapshotCoalesce(self, vdi):
2637 # Note that because we are not holding any locks here, concurrent SM
2638 # operations may change this tree under our feet. In particular, vdi
2639 # can be deleted, or it can be snapshotted.
2640 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED)
2641 Util.log("Single-snapshotting %s" % vdi)
2642 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid)
2643 try:
2644 ret = self.xapi.singleSnapshotVDI(vdi)
2645 Util.log("Single-snapshot returned: %s" % ret)
2646 except XenAPI.Failure as e:
2647 if util.isInvalidVDI(e):
2648 Util.log("The VDI appears to have been concurrently deleted")
2649 return False
2650 raise
2651 self.scanLocked()
2652 tempSnap = vdi.parent
2653 if not tempSnap.isCoalesceable():
2654 Util.log("The VDI appears to have been concurrently snapshotted")
2655 return False
2656 Util.log("Coalescing parent %s" % tempSnap)
2657 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
2658 vhdSize = vdi.getSizeVHD()
2659 self._coalesce(tempSnap)
2660 if not vdi.isLeafCoalesceable():
2661 Util.log("The VDI tree appears to have been altered since")
2662 return False
2663 return True
2665 def _liveLeafCoalesce(self, vdi) -> bool:
2666 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
2667 self.lock()
2668 try:
2669 self.scan()
2670 if not self.getVDI(vdi.uuid):
2671 Util.log("The VDI appears to have been deleted meanwhile")
2672 return False
2673 if not vdi.isLeafCoalesceable():
2674 Util.log("The VDI is no longer leaf-coalesceable")
2675 return False
2677 uuid = vdi.uuid
2678 vdi.pause(failfast=True)
2679 try:
2680 try:
2681 # "vdi" object will no longer be valid after this call
2682 self._doCoalesceLeaf(vdi)
2683 except:
2684 Util.logException("_doCoalesceLeaf")
2685 self._handleInterruptedCoalesceLeaf()
2686 raise
2687 finally:
2688 vdi = self.getVDI(uuid)
2689 if vdi:
2690 vdi.ensureUnpaused()
2691 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid)
2692 if vdiOld:
2693 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2694 self.deleteVDI(vdiOld)
2695 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2696 finally:
2697 self.cleanup()
2698 self.unlock()
2699 self.logFilter.logState()
2700 return True
2702 def _doCoalesceLeaf(self, vdi):
2703 """Actual coalescing of a leaf VDI onto parent. Must be called in an
2704 offline/atomic context"""
2705 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid)
2706 self._prepareCoalesceLeaf(vdi)
2707 vdi.parent._setHidden(False)
2708 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False)
2709 vdi.validate(True)
2710 vdi.parent.validate(True)
2711 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid)
2712 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT
2713 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2713 ↛ 2714line 2713 didn't jump to line 2714, because the condition on line 2713 was never true
2714 Util.log("Leaf-coalesce forced, will not use timeout")
2715 timeout = 0
2716 vdi._coalesceVHD(timeout)
2717 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid)
2718 vdi.parent.validate(True)
2719 #vdi._verifyContents(timeout / 2)
2721 # rename
2722 vdiUuid = vdi.uuid
2723 oldName = vdi.fileName
2724 origParentUuid = vdi.parent.uuid
2725 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid)
2726 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid)
2727 vdi.parent.rename(vdiUuid)
2728 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid)
2729 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
2731 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is
2732 # garbage
2734 # update the VDI record
2735 if vdi.parent.raw: 2735 ↛ 2736line 2735 didn't jump to line 2736, because the condition on line 2735 was never true
2736 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW)
2737 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS)
2738 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
2740 self._updateNode(vdi)
2742 # delete the obsolete leaf & inflate the parent (in that order, to
2743 # minimize free space requirements)
2744 parent = vdi.parent
2745 vdi._setHidden(True)
2746 vdi.parent.children = []
2747 vdi.parent = None
2749 if parent.parent is None:
2750 parent.delConfig(VDI.DB_VHD_PARENT)
2752 extraSpace = self._calcExtraSpaceNeeded(vdi, parent)
2753 freeSpace = self.getFreeSpace()
2754 if freeSpace < extraSpace: 2754 ↛ 2757line 2754 didn't jump to line 2757, because the condition on line 2754 was never true
2755 # don't delete unless we need the space: deletion is time-consuming
2756 # because it requires contacting the slaves, and we're paused here
2757 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2758 self.deleteVDI(vdi)
2759 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2761 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid)
2762 self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
2764 self.forgetVDI(origParentUuid)
2765 self._finishCoalesceLeaf(parent)
2766 self._updateSlavesOnResize(parent)
2768 def _calcExtraSpaceNeeded(self, child, parent) -> int:
2769 assert(not parent.raw) # raw parents not supported
2770 extra = child.getSizeVHD() - parent.getSizeVHD()
2771 if extra < 0: 2771 ↛ 2772line 2771 didn't jump to line 2772, because the condition on line 2771 was never true
2772 extra = 0
2773 return extra
2775 def _prepareCoalesceLeaf(self, vdi) -> None:
2776 pass
2778 def _updateNode(self, vdi) -> None:
2779 pass
2781 def _finishCoalesceLeaf(self, parent) -> None:
2782 pass
2784 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
2785 pass
2787 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None:
2788 pass
2790 def _updateSlavesOnResize(self, vdi) -> None:
2791 pass
2793 def _removeStaleVDIs(self, uuidsPresent) -> None:
2794 for uuid in list(self.vdis.keys()):
2795 if not uuid in uuidsPresent:
2796 Util.log("VDI %s disappeared since last scan" % \
2797 self.vdis[uuid])
2798 del self.vdis[uuid]
2800 def _handleInterruptedCoalesceLeaf(self) -> None:
2801 """An interrupted leaf-coalesce operation may leave the VHD tree in an
2802 inconsistent state. If the old-leaf VDI is still present, we revert the
2803 operation (in case the original error is persistent); otherwise we must
2804 finish the operation"""
2805 pass
2807 def _buildTree(self, force):
2808 self.vdiTrees = []
2809 for vdi in self.vdis.values():
2810 if vdi.parentUuid:
2811 parent = self.getVDI(vdi.parentUuid)
2812 if not parent:
2813 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX):
2814 self.vdiTrees.append(vdi)
2815 continue
2816 if force:
2817 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \
2818 (vdi.parentUuid, vdi.uuid))
2819 self.vdiTrees.append(vdi)
2820 continue
2821 else:
2822 raise util.SMException("Parent VDI %s of %s not " \
2823 "found" % (vdi.parentUuid, vdi.uuid))
2824 vdi.parent = parent
2825 parent.children.append(vdi)
2826 else:
2827 self.vdiTrees.append(vdi)
2830class FileSR(SR):
2831 TYPE = SR.TYPE_FILE
2832 CACHE_FILE_EXT = ".vhdcache"
2833 # cache cleanup actions
2834 CACHE_ACTION_KEEP = 0
2835 CACHE_ACTION_REMOVE = 1
2836 CACHE_ACTION_REMOVE_IF_INACTIVE = 2
2838 def __init__(self, uuid, xapi, createLock, force):
2839 SR.__init__(self, uuid, xapi, createLock, force)
2840 self.path = "/var/run/sr-mount/%s" % self.uuid
2841 self.journaler = fjournaler.Journaler(self.path)
2843 @override
2844 def scan(self, force=False) -> None:
2845 if not util.pathexists(self.path):
2846 raise util.SMException("directory %s not found!" % self.uuid)
2847 vhds = self._scan(force)
2848 for uuid, vhdInfo in vhds.items():
2849 vdi = self.getVDI(uuid)
2850 if not vdi:
2851 self.logFilter.logNewVDI(uuid)
2852 vdi = FileVDI(self, uuid, False)
2853 self.vdis[uuid] = vdi
2854 vdi.load(vhdInfo)
2855 uuidsPresent = list(vhds.keys())
2856 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)]
2857 for rawName in rawList:
2858 uuid = FileVDI.extractUuid(rawName)
2859 uuidsPresent.append(uuid)
2860 vdi = self.getVDI(uuid)
2861 if not vdi:
2862 self.logFilter.logNewVDI(uuid)
2863 vdi = FileVDI(self, uuid, True)
2864 self.vdis[uuid] = vdi
2865 self._removeStaleVDIs(uuidsPresent)
2866 self._buildTree(force)
2867 self.logFilter.logState()
2868 self._handleInterruptedCoalesceLeaf()
2870 @override
2871 def getFreeSpace(self) -> int:
2872 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
2874 @override
2875 def deleteVDIs(self, vdiList) -> None:
2876 rootDeleted = False
2877 for vdi in vdiList:
2878 if not vdi.parent:
2879 rootDeleted = True
2880 break
2881 SR.deleteVDIs(self, vdiList)
2882 if self.xapi.srRecord["type"] == "nfs" and rootDeleted:
2883 self.xapi.markCacheSRsDirty()
2885 @override
2886 def cleanupCache(self, maxAge=-1) -> int:
2887 """Clean up IntelliCache cache files. Caches for leaf nodes are
2888 removed when the leaf node no longer exists or its allow-caching
2889 attribute is not set. Caches for parent nodes are removed when the
2890 parent node no longer exists or it hasn't been used in more than
2891 <maxAge> hours.
2892 Return number of caches removed.
2893 """
2894 numRemoved = 0
2895 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)]
2896 Util.log("Found %d cache files" % len(cacheFiles))
2897 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge)
2898 for cacheFile in cacheFiles:
2899 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)]
2900 action = self.CACHE_ACTION_KEEP
2901 rec = self.xapi.getRecordVDI(uuid)
2902 if not rec:
2903 Util.log("Cache %s: VDI doesn't exist" % uuid)
2904 action = self.CACHE_ACTION_REMOVE
2905 elif rec["managed"] and not rec["allow_caching"]:
2906 Util.log("Cache %s: caching disabled" % uuid)
2907 action = self.CACHE_ACTION_REMOVE
2908 elif not rec["managed"] and maxAge >= 0:
2909 lastAccess = datetime.datetime.fromtimestamp( \
2910 os.path.getatime(os.path.join(self.path, cacheFile)))
2911 if lastAccess < cutoff:
2912 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge))
2913 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
2915 if action == self.CACHE_ACTION_KEEP:
2916 Util.log("Keeping cache %s" % uuid)
2917 continue
2919 lockId = uuid
2920 parentUuid = None
2921 if rec and rec["managed"]:
2922 parentUuid = rec["sm_config"].get("vhd-parent")
2923 if parentUuid:
2924 lockId = parentUuid
2926 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId)
2927 cacheLock.acquire()
2928 try:
2929 if self._cleanupCache(uuid, action):
2930 numRemoved += 1
2931 finally:
2932 cacheLock.release()
2933 return numRemoved
2935 def _cleanupCache(self, uuid, action):
2936 assert(action != self.CACHE_ACTION_KEEP)
2937 rec = self.xapi.getRecordVDI(uuid)
2938 if rec and rec["allow_caching"]:
2939 Util.log("Cache %s appears to have become valid" % uuid)
2940 return False
2942 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT)
2943 tapdisk = blktap2.Tapdisk.find_by_path(fullPath)
2944 if tapdisk:
2945 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE:
2946 Util.log("Cache %s still in use" % uuid)
2947 return False
2948 Util.log("Shutting down tapdisk for %s" % fullPath)
2949 tapdisk.shutdown()
2951 Util.log("Deleting file %s" % fullPath)
2952 os.unlink(fullPath)
2953 return True
2955 def _isCacheFileName(self, name):
2956 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \
2957 name.endswith(self.CACHE_FILE_EXT)
2959 def _scan(self, force):
2960 for i in range(SR.SCAN_RETRY_ATTEMPTS):
2961 error = False
2962 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD)
2963 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid)
2964 for uuid, vhdInfo in vhds.items():
2965 if vhdInfo.error:
2966 error = True
2967 break
2968 if not error:
2969 return vhds
2970 Util.log("Scan error on attempt %d" % i)
2971 if force:
2972 return vhds
2973 raise util.SMException("Scan error")
2975 @override
2976 def deleteVDI(self, vdi) -> None:
2977 self._checkSlaves(vdi)
2978 SR.deleteVDI(self, vdi)
2980 def _checkSlaves(self, vdi):
2981 onlineHosts = self.xapi.getOnlineHosts()
2982 abortFlag = IPCFlag(self.uuid)
2983 for pbdRecord in self.xapi.getAttachedPBDs():
2984 hostRef = pbdRecord["host"]
2985 if hostRef == self.xapi._hostRef:
2986 continue
2987 if abortFlag.test(FLAG_TYPE_ABORT):
2988 raise AbortException("Aborting due to signal")
2989 try:
2990 self._checkSlave(hostRef, vdi)
2991 except util.CommandException:
2992 if hostRef in onlineHosts:
2993 raise
2995 def _checkSlave(self, hostRef, vdi):
2996 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path})
2997 Util.log("Checking with slave: %s" % repr(call))
2998 _host = self.xapi.session.xenapi.host
2999 text = _host.call_plugin( * call)
3001 @override
3002 def _handleInterruptedCoalesceLeaf(self) -> None:
3003 entries = self.journaler.getAll(VDI.JRN_LEAF)
3004 for uuid, parentUuid in entries.items():
3005 fileList = os.listdir(self.path)
3006 childName = uuid + vhdutil.FILE_EXTN_VHD
3007 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD
3008 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD
3009 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW
3010 parentPresent = (parentName1 in fileList or parentName2 in fileList)
3011 if parentPresent or tmpChildName in fileList:
3012 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3013 else:
3014 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3015 self.journaler.remove(VDI.JRN_LEAF, uuid)
3016 vdi = self.getVDI(uuid)
3017 if vdi:
3018 vdi.ensureUnpaused()
3020 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3021 Util.log("*** UNDO LEAF-COALESCE")
3022 parent = self.getVDI(parentUuid)
3023 if not parent:
3024 parent = self.getVDI(childUuid)
3025 if not parent:
3026 raise util.SMException("Neither %s nor %s found" % \
3027 (parentUuid, childUuid))
3028 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3029 parent.rename(parentUuid)
3030 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3032 child = self.getVDI(childUuid)
3033 if not child:
3034 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3035 if not child:
3036 raise util.SMException("Neither %s nor %s found" % \
3037 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3038 Util.log("Renaming child back to %s" % childUuid)
3039 child.rename(childUuid)
3040 Util.log("Updating the VDI record")
3041 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3042 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3043 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3045 if child.isHidden():
3046 child._setHidden(False)
3047 if not parent.isHidden():
3048 parent._setHidden(True)
3049 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3050 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3051 Util.log("*** leaf-coalesce undo successful")
3052 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3053 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3055 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3056 Util.log("*** FINISH LEAF-COALESCE")
3057 vdi = self.getVDI(childUuid)
3058 if not vdi:
3059 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting")
3060 raise util.SMException("VDI %s not found" % childUuid)
3061 try:
3062 self.forgetVDI(parentUuid)
3063 except XenAPI.Failure:
3064 Util.logException('_finishInterruptedCoalesceLeaf')
3065 pass
3066 self._updateSlavesOnResize(vdi)
3067 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3068 Util.log("*** finished leaf-coalesce successfully")
3071class LVHDSR(SR):
3072 TYPE = SR.TYPE_LVHD
3073 SUBTYPES = ["lvhdoiscsi", "lvhdohba"]
3075 def __init__(self, uuid, xapi, createLock, force):
3076 SR.__init__(self, uuid, xapi, createLock, force)
3077 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid)
3078 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName)
3080 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid)
3081 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref)
3082 lvm_conf = other_conf.get('lvm-conf') if other_conf else None
3083 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf)
3085 self.lvActivator = LVActivator(self.uuid, self.lvmCache)
3086 self.journaler = journaler.Journaler(self.lvmCache)
3088 @override
3089 def deleteVDI(self, vdi) -> None:
3090 if self.lvActivator.get(vdi.uuid, False):
3091 self.lvActivator.deactivate(vdi.uuid, False)
3092 self._checkSlaves(vdi)
3093 SR.deleteVDI(self, vdi)
3095 @override
3096 def forgetVDI(self, vdiUuid) -> None:
3097 SR.forgetVDI(self, vdiUuid)
3098 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME)
3099 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
3101 @override
3102 def getFreeSpace(self) -> int:
3103 stats = lvutil._getVGstats(self.vgName)
3104 return stats['physical_size'] - stats['physical_utilisation']
3106 @override
3107 def cleanup(self):
3108 if not self.lvActivator.deactivateAll():
3109 Util.log("ERROR deactivating LVs while cleaning up")
3111 @override
3112 def needUpdateBlockInfo(self) -> bool:
3113 for vdi in self.vdis.values():
3114 if vdi.scanError or vdi.raw or len(vdi.children) == 0:
3115 continue
3116 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
3117 return True
3118 return False
3120 @override
3121 def updateBlockInfo(self) -> None:
3122 numUpdated = 0
3123 for vdi in self.vdis.values():
3124 if vdi.scanError or vdi.raw or len(vdi.children) == 0:
3125 continue
3126 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
3127 vdi.updateBlockInfo()
3128 numUpdated += 1
3129 if numUpdated:
3130 # deactivate the LVs back sooner rather than later. If we don't
3131 # now, by the time this thread gets to deactivations, another one
3132 # might have leaf-coalesced a node and deleted it, making the child
3133 # inherit the refcount value and preventing the correct decrement
3134 self.cleanup()
3136 @override
3137 def scan(self, force=False) -> None:
3138 vdis = self._scan(force)
3139 for uuid, vdiInfo in vdis.items():
3140 vdi = self.getVDI(uuid)
3141 if not vdi:
3142 self.logFilter.logNewVDI(uuid)
3143 vdi = LVHDVDI(self, uuid,
3144 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW)
3145 self.vdis[uuid] = vdi
3146 vdi.load(vdiInfo)
3147 self._removeStaleVDIs(vdis.keys())
3148 self._buildTree(force)
3149 self.logFilter.logState()
3150 self._handleInterruptedCoalesceLeaf()
3152 def _scan(self, force):
3153 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3154 error = False
3155 self.lvmCache.refresh()
3156 vdis = lvhdutil.getVDIInfo(self.lvmCache)
3157 for uuid, vdiInfo in vdis.items():
3158 if vdiInfo.scanError:
3159 error = True
3160 break
3161 if not error:
3162 return vdis
3163 Util.log("Scan error, retrying (%d)" % i)
3164 if force:
3165 return vdis
3166 raise util.SMException("Scan error")
3168 @override
3169 def _removeStaleVDIs(self, uuidsPresent) -> None:
3170 for uuid in list(self.vdis.keys()):
3171 if not uuid in uuidsPresent:
3172 Util.log("VDI %s disappeared since last scan" % \
3173 self.vdis[uuid])
3174 del self.vdis[uuid]
3175 if self.lvActivator.get(uuid, False):
3176 self.lvActivator.remove(uuid, False)
3178 @override
3179 def _liveLeafCoalesce(self, vdi) -> bool:
3180 """If the parent is raw and the child was resized (virt. size), then
3181 we'll need to resize the parent, which can take a while due to zeroing
3182 out of the extended portion of the LV. Do it before pausing the child
3183 to avoid a protracted downtime"""
3184 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt:
3185 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3186 vdi.parent._increaseSizeVirt(vdi.sizeVirt)
3188 return SR._liveLeafCoalesce(self, vdi)
3190 @override
3191 def _prepareCoalesceLeaf(self, vdi) -> None:
3192 vdi._activateChain()
3193 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3194 vdi.deflate()
3195 vdi.inflateParentForCoalesce()
3197 @override
3198 def _updateNode(self, vdi) -> None:
3199 # fix the refcounts: the remaining node should inherit the binary
3200 # refcount from the leaf (because if it was online, it should remain
3201 # refcounted as such), but the normal refcount from the parent (because
3202 # this node is really the parent node) - minus 1 if it is online (since
3203 # non-leaf nodes increment their normal counts when they are online and
3204 # we are now a leaf, storing that 1 in the binary refcount).
3205 ns = lvhdutil.NS_PREFIX_LVM + self.uuid
3206 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns)
3207 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns)
3208 pCnt = pCnt - cBcnt
3209 assert(pCnt >= 0)
3210 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
3212 @override
3213 def _finishCoalesceLeaf(self, parent) -> None:
3214 if not parent.isSnapshot() or parent.isAttachedRW():
3215 parent.inflateFully()
3216 else:
3217 parent.deflate()
3219 @override
3220 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3221 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV
3223 @override
3224 def _handleInterruptedCoalesceLeaf(self) -> None:
3225 entries = self.journaler.getAll(VDI.JRN_LEAF)
3226 for uuid, parentUuid in entries.items():
3227 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid
3228 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \
3229 self.TMP_RENAME_PREFIX + uuid
3230 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid
3231 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid
3232 parentPresent = (self.lvmCache.checkLV(parentLV1) or \
3233 self.lvmCache.checkLV(parentLV2))
3234 if parentPresent or self.lvmCache.checkLV(tmpChildLV):
3235 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3236 else:
3237 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3238 self.journaler.remove(VDI.JRN_LEAF, uuid)
3239 vdi = self.getVDI(uuid)
3240 if vdi:
3241 vdi.ensureUnpaused()
3243 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3244 Util.log("*** UNDO LEAF-COALESCE")
3245 parent = self.getVDI(parentUuid)
3246 if not parent:
3247 parent = self.getVDI(childUuid)
3248 if not parent:
3249 raise util.SMException("Neither %s nor %s found" % \
3250 (parentUuid, childUuid))
3251 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3252 parent.rename(parentUuid)
3253 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3255 child = self.getVDI(childUuid)
3256 if not child:
3257 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3258 if not child:
3259 raise util.SMException("Neither %s nor %s found" % \
3260 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3261 Util.log("Renaming child back to %s" % childUuid)
3262 child.rename(childUuid)
3263 Util.log("Updating the VDI record")
3264 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3265 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3266 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3268 # refcount (best effort - assume that it had succeeded if the
3269 # second rename succeeded; if not, this adjustment will be wrong,
3270 # leading to a non-deactivation of the LV)
3271 ns = lvhdutil.NS_PREFIX_LVM + self.uuid
3272 cCnt, cBcnt = RefCounter.check(child.uuid, ns)
3273 pCnt, pBcnt = RefCounter.check(parent.uuid, ns)
3274 pCnt = pCnt + cBcnt
3275 RefCounter.set(parent.uuid, pCnt, 0, ns)
3276 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
3278 parent.deflate()
3279 child.inflateFully()
3280 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid)
3281 if child.isHidden():
3282 child._setHidden(False)
3283 if not parent.isHidden():
3284 parent._setHidden(True)
3285 if not parent.lvReadonly:
3286 self.lvmCache.setReadonly(parent.fileName, True)
3287 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3288 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3289 Util.log("*** leaf-coalesce undo successful")
3290 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3291 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3293 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3294 Util.log("*** FINISH LEAF-COALESCE")
3295 vdi = self.getVDI(childUuid)
3296 if not vdi:
3297 raise util.SMException("VDI %s not found" % childUuid)
3298 vdi.inflateFully()
3299 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid)
3300 try:
3301 self.forgetVDI(parentUuid)
3302 except XenAPI.Failure:
3303 pass
3304 self._updateSlavesOnResize(vdi)
3305 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3306 Util.log("*** finished leaf-coalesce successfully")
3308 def _checkSlaves(self, vdi):
3309 """Confirm with all slaves in the pool that 'vdi' is not in use. We
3310 try to check all slaves, including those that the Agent believes are
3311 offline, but ignore failures for offline hosts. This is to avoid cases
3312 where the Agent thinks a host is offline but the host is up."""
3313 args = {"vgName": self.vgName,
3314 "action1": "deactivateNoRefcount",
3315 "lvName1": vdi.fileName,
3316 "action2": "cleanupLockAndRefcount",
3317 "uuid2": vdi.uuid,
3318 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid}
3319 onlineHosts = self.xapi.getOnlineHosts()
3320 abortFlag = IPCFlag(self.uuid)
3321 for pbdRecord in self.xapi.getAttachedPBDs():
3322 hostRef = pbdRecord["host"]
3323 if hostRef == self.xapi._hostRef:
3324 continue
3325 if abortFlag.test(FLAG_TYPE_ABORT):
3326 raise AbortException("Aborting due to signal")
3327 Util.log("Checking with slave %s (path %s)" % (
3328 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path))
3329 try:
3330 self.xapi.ensureInactive(hostRef, args)
3331 except XenAPI.Failure:
3332 if hostRef in onlineHosts:
3333 raise
3335 @override
3336 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
3337 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid])
3338 if not slaves:
3339 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \
3340 child)
3341 return
3343 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \
3344 self.TMP_RENAME_PREFIX + child.uuid
3345 args = {"vgName": self.vgName,
3346 "action1": "deactivateNoRefcount",
3347 "lvName1": tmpName,
3348 "action2": "deactivateNoRefcount",
3349 "lvName2": child.fileName,
3350 "action3": "refresh",
3351 "lvName3": child.fileName,
3352 "action4": "refresh",
3353 "lvName4": parent.fileName}
3354 for slave in slaves:
3355 Util.log("Updating %s, %s, %s on slave %s" % \
3356 (tmpName, child.fileName, parent.fileName,
3357 self.xapi.getRecordHost(slave)['hostname']))
3358 text = self.xapi.session.xenapi.host.call_plugin( \
3359 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3360 Util.log("call-plugin returned: '%s'" % text)
3362 @override
3363 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None:
3364 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid])
3365 if not slaves:
3366 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi)
3367 return
3369 args = {"vgName": self.vgName,
3370 "action1": "deactivateNoRefcount",
3371 "lvName1": oldNameLV,
3372 "action2": "refresh",
3373 "lvName2": vdi.fileName,
3374 "action3": "cleanupLockAndRefcount",
3375 "uuid3": origParentUuid,
3376 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid}
3377 for slave in slaves:
3378 Util.log("Updating %s to %s on slave %s" % \
3379 (oldNameLV, vdi.fileName,
3380 self.xapi.getRecordHost(slave)['hostname']))
3381 text = self.xapi.session.xenapi.host.call_plugin( \
3382 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3383 Util.log("call-plugin returned: '%s'" % text)
3385 @override
3386 def _updateSlavesOnResize(self, vdi) -> None:
3387 uuids = [x.uuid for x in vdi.getAllLeaves()]
3388 slaves = util.get_slaves_attached_on(self.xapi.session, uuids)
3389 if not slaves:
3390 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi)
3391 return
3392 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName,
3393 vdi.fileName, vdi.uuid, slaves)
3396class LinstorSR(SR):
3397 TYPE = SR.TYPE_LINSTOR
3399 def __init__(self, uuid, xapi, createLock, force):
3400 if not LINSTOR_AVAILABLE:
3401 raise util.SMException(
3402 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing'
3403 )
3405 SR.__init__(self, uuid, xapi, createLock, force)
3406 self.path = LinstorVolumeManager.DEV_ROOT_PATH
3407 self._reloadLinstor(journaler_only=True)
3409 @override
3410 def deleteVDI(self, vdi) -> None:
3411 self._checkSlaves(vdi)
3412 SR.deleteVDI(self, vdi)
3414 @override
3415 def getFreeSpace(self) -> int:
3416 return self._linstor.max_volume_size_allowed
3418 @override
3419 def scan(self, force=False) -> None:
3420 all_vdi_info = self._scan(force)
3421 for uuid, vdiInfo in all_vdi_info.items():
3422 # When vdiInfo is None, the VDI is RAW.
3423 vdi = self.getVDI(uuid)
3424 if not vdi:
3425 self.logFilter.logNewVDI(uuid)
3426 vdi = LinstorVDI(self, uuid, not vdiInfo)
3427 self.vdis[uuid] = vdi
3428 if vdiInfo:
3429 vdi.load(vdiInfo)
3430 self._removeStaleVDIs(all_vdi_info.keys())
3431 self._buildTree(force)
3432 self.logFilter.logState()
3433 self._handleInterruptedCoalesceLeaf()
3435 @override
3436 def pauseVDIs(self, vdiList) -> None:
3437 self._linstor.ensure_volume_list_is_not_locked(
3438 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3439 )
3440 return super(LinstorSR, self).pauseVDIs(vdiList)
3442 def _reloadLinstor(self, journaler_only=False):
3443 session = self.xapi.session
3444 host_ref = util.get_this_host_ref(session)
3445 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
3447 pbd = util.find_my_pbd(session, host_ref, sr_ref)
3448 if pbd is None:
3449 raise util.SMException('Failed to find PBD')
3451 dconf = session.xenapi.PBD.get_device_config(pbd)
3452 group_name = dconf['group-name']
3454 controller_uri = get_controller_uri()
3455 self.journaler = LinstorJournaler(
3456 controller_uri, group_name, logger=util.SMlog
3457 )
3459 if journaler_only:
3460 return
3462 self._linstor = LinstorVolumeManager(
3463 controller_uri,
3464 group_name,
3465 repair=True,
3466 logger=util.SMlog
3467 )
3468 self._vhdutil = LinstorVhdUtil(session, self._linstor)
3470 def _scan(self, force):
3471 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3472 self._reloadLinstor()
3473 error = False
3474 try:
3475 all_vdi_info = self._load_vdi_info()
3476 for uuid, vdiInfo in all_vdi_info.items():
3477 if vdiInfo and vdiInfo.error:
3478 error = True
3479 break
3480 if not error:
3481 return all_vdi_info
3482 Util.log('Scan error, retrying ({})'.format(i))
3483 except Exception as e:
3484 Util.log('Scan exception, retrying ({}): {}'.format(i, e))
3485 Util.log(traceback.format_exc())
3487 if force:
3488 return all_vdi_info
3489 raise util.SMException('Scan error')
3491 def _load_vdi_info(self):
3492 all_vdi_info = {}
3494 # TODO: Ensure metadata contains the right info.
3496 all_volume_info = self._linstor.get_volumes_with_info()
3497 volumes_metadata = self._linstor.get_volumes_with_metadata()
3498 for vdi_uuid, volume_info in all_volume_info.items():
3499 try:
3500 volume_metadata = volumes_metadata[vdi_uuid]
3501 if not volume_info.name and not list(volume_metadata.items()):
3502 continue # Ignore it, probably deleted.
3504 if vdi_uuid.startswith('DELETED_'):
3505 # Assume it's really a RAW volume of a failed snap without VHD header/footer.
3506 # We must remove this VDI now without adding it in the VDI list.
3507 # Otherwise `Relinking` calls and other actions can be launched on it.
3508 # We don't want that...
3509 Util.log('Deleting bad VDI {}'.format(vdi_uuid))
3511 self.lock()
3512 try:
3513 self._linstor.destroy_volume(vdi_uuid)
3514 try:
3515 self.forgetVDI(vdi_uuid)
3516 except:
3517 pass
3518 except Exception as e:
3519 Util.log('Cannot delete bad VDI: {}'.format(e))
3520 finally:
3521 self.unlock()
3522 continue
3524 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
3525 volume_name = self._linstor.get_volume_name(vdi_uuid)
3526 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX):
3527 # Always RAW!
3528 info = None
3529 elif vdi_type == vhdutil.VDI_TYPE_VHD:
3530 info = self._vhdutil.get_vhd_info(vdi_uuid)
3531 else:
3532 # Ensure it's not a VHD...
3533 try:
3534 info = self._vhdutil.get_vhd_info(vdi_uuid)
3535 except:
3536 try:
3537 self._vhdutil.force_repair(
3538 self._linstor.get_device_path(vdi_uuid)
3539 )
3540 info = self._vhdutil.get_vhd_info(vdi_uuid)
3541 except:
3542 info = None
3544 except Exception as e:
3545 Util.log(
3546 ' [VDI {}: failed to load VDI info]: {}'
3547 .format(vdi_uuid, e)
3548 )
3549 info = vhdutil.VHDInfo(vdi_uuid)
3550 info.error = 1
3552 all_vdi_info[vdi_uuid] = info
3554 return all_vdi_info
3556 @override
3557 def _prepareCoalesceLeaf(self, vdi) -> None:
3558 vdi._activateChain()
3559 vdi.deflate()
3560 vdi._inflateParentForCoalesce()
3562 @override
3563 def _finishCoalesceLeaf(self, parent) -> None:
3564 if not parent.isSnapshot() or parent.isAttachedRW():
3565 parent.inflateFully()
3566 else:
3567 parent.deflate()
3569 @override
3570 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3571 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize()
3573 def _hasValidDevicePath(self, uuid):
3574 try:
3575 self._linstor.get_device_path(uuid)
3576 except Exception:
3577 # TODO: Maybe log exception.
3578 return False
3579 return True
3581 @override
3582 def _liveLeafCoalesce(self, vdi) -> bool:
3583 self.lock()
3584 try:
3585 self._linstor.ensure_volume_is_not_locked(
3586 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3587 )
3588 return super(LinstorSR, self)._liveLeafCoalesce(vdi)
3589 finally:
3590 self.unlock()
3592 @override
3593 def _handleInterruptedCoalesceLeaf(self) -> None:
3594 entries = self.journaler.get_all(VDI.JRN_LEAF)
3595 for uuid, parentUuid in entries.items():
3596 if self._hasValidDevicePath(parentUuid) or \
3597 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid):
3598 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3599 else:
3600 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3601 self.journaler.remove(VDI.JRN_LEAF, uuid)
3602 vdi = self.getVDI(uuid)
3603 if vdi:
3604 vdi.ensureUnpaused()
3606 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3607 Util.log('*** UNDO LEAF-COALESCE')
3608 parent = self.getVDI(parentUuid)
3609 if not parent:
3610 parent = self.getVDI(childUuid)
3611 if not parent:
3612 raise util.SMException(
3613 'Neither {} nor {} found'.format(parentUuid, childUuid)
3614 )
3615 Util.log(
3616 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
3617 )
3618 parent.rename(parentUuid)
3620 child = self.getVDI(childUuid)
3621 if not child:
3622 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3623 if not child:
3624 raise util.SMException(
3625 'Neither {} nor {} found'.format(
3626 childUuid, self.TMP_RENAME_PREFIX + childUuid
3627 )
3628 )
3629 Util.log('Renaming child back to {}'.format(childUuid))
3630 child.rename(childUuid)
3631 Util.log('Updating the VDI record')
3632 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3633 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3635 # TODO: Maybe deflate here.
3637 if child.isHidden():
3638 child._setHidden(False)
3639 if not parent.isHidden():
3640 parent._setHidden(True)
3641 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3642 Util.log('*** leaf-coalesce undo successful')
3644 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3645 Util.log('*** FINISH LEAF-COALESCE')
3646 vdi = self.getVDI(childUuid)
3647 if not vdi:
3648 raise util.SMException('VDI {} not found'.format(childUuid))
3649 # TODO: Maybe inflate.
3650 try:
3651 self.forgetVDI(parentUuid)
3652 except XenAPI.Failure:
3653 pass
3654 self._updateSlavesOnResize(vdi)
3655 Util.log('*** finished leaf-coalesce successfully')
3657 def _checkSlaves(self, vdi):
3658 try:
3659 all_openers = self._linstor.get_volume_openers(vdi.uuid)
3660 for openers in all_openers.values():
3661 for opener in openers.values():
3662 if opener['process-name'] != 'tapdisk':
3663 raise util.SMException(
3664 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3665 )
3666 except LinstorVolumeManagerError as e:
3667 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
3668 raise
3671################################################################################
3672#
3673# Helpers
3674#
3675def daemonize():
3676 pid = os.fork()
3677 if pid:
3678 os.waitpid(pid, 0)
3679 Util.log("New PID [%d]" % pid)
3680 return False
3681 os.chdir("/")
3682 os.setsid()
3683 pid = os.fork()
3684 if pid:
3685 Util.log("Will finish as PID [%d]" % pid)
3686 os._exit(0)
3687 for fd in [0, 1, 2]:
3688 try:
3689 os.close(fd)
3690 except OSError:
3691 pass
3692 # we need to fill those special fd numbers or pread won't work
3693 sys.stdin = open("/dev/null", 'r')
3694 sys.stderr = open("/dev/null", 'w')
3695 sys.stdout = open("/dev/null", 'w')
3696 # As we're a new process we need to clear the lock objects
3697 lock.Lock.clearAll()
3698 return True
3701def normalizeType(type):
3702 if type in LVHDSR.SUBTYPES:
3703 type = SR.TYPE_LVHD
3704 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]:
3705 # temporary while LVHD is symlinked as LVM
3706 type = SR.TYPE_LVHD
3707 if type in [
3708 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs",
3709 "moosefs", "xfs", "zfs", "largeblock"
3710 ]:
3711 type = SR.TYPE_FILE
3712 if type in ["linstor"]:
3713 type = SR.TYPE_LINSTOR
3714 if type not in SR.TYPES:
3715 raise util.SMException("Unsupported SR type: %s" % type)
3716 return type
3718GCPAUSE_DEFAULT_SLEEP = 5 * 60
3721def _gc_init_file(sr_uuid):
3722 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init')
3725def _create_init_file(sr_uuid):
3726 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid)))
3727 with open(os.path.join(
3728 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f:
3729 f.write('1')
3732def _gcLoopPause(sr, dryRun=False, immediate=False):
3733 if immediate:
3734 return
3736 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist
3737 # point will just return. Otherwise, fall back on an abortable sleep.
3739 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT):
3741 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3741 ↛ exitline 3741 didn't jump to the function exit
3742 lambda *args: None)
3743 elif os.path.exists(_gc_init_file(sr.uuid)):
3744 def abortTest():
3745 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
3747 # If time.sleep hangs we are in deep trouble, however for
3748 # completeness we set the timeout of the abort thread to
3749 # 110% of GCPAUSE_DEFAULT_SLEEP.
3750 Util.log("GC active, about to go quiet")
3751 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3751 ↛ exitline 3751 didn't run the lambda on line 3751
3752 None, sr.uuid, abortTest, VDI.POLL_INTERVAL,
3753 GCPAUSE_DEFAULT_SLEEP * 1.1)
3754 Util.log("GC active, quiet period ended")
3757def _gcLoop(sr, dryRun=False, immediate=False):
3758 if not lockGCActive.acquireNoblock(): 3758 ↛ 3759line 3758 didn't jump to line 3759, because the condition on line 3758 was never true
3759 Util.log("Another GC instance already active, exiting")
3760 return
3762 # Check we're still attached after acquiring locks
3763 if not sr.xapi.isPluggedHere():
3764 Util.log("SR no longer attached, exiting")
3765 return
3767 # Clean up Intellicache files
3768 sr.cleanupCache()
3770 # Track how many we do
3771 coalesced = 0
3772 task_status = "success"
3773 try:
3774 # Check if any work needs to be done
3775 if not sr.xapi.isPluggedHere(): 3775 ↛ 3776line 3775 didn't jump to line 3776, because the condition on line 3775 was never true
3776 Util.log("SR no longer attached, exiting")
3777 return
3778 sr.scanLocked()
3779 if not sr.hasWork():
3780 Util.log("No work, exiting")
3781 return
3782 sr.xapi.create_task(
3783 "Garbage Collection",
3784 "Garbage collection for SR %s" % sr.uuid)
3785 _gcLoopPause(sr, dryRun, immediate=immediate)
3786 while True:
3787 if SIGTERM:
3788 Util.log("Term requested")
3789 return
3791 if not sr.xapi.isPluggedHere(): 3791 ↛ 3792line 3791 didn't jump to line 3792, because the condition on line 3791 was never true
3792 Util.log("SR no longer attached, exiting")
3793 break
3794 sr.scanLocked()
3795 if not sr.hasWork():
3796 Util.log("No work, exiting")
3797 break
3799 if not lockGCRunning.acquireNoblock(): 3799 ↛ 3800line 3799 didn't jump to line 3800, because the condition on line 3799 was never true
3800 Util.log("Unable to acquire GC running lock.")
3801 return
3802 try:
3803 if not sr.gcEnabled(): 3803 ↛ 3804line 3803 didn't jump to line 3804, because the condition on line 3803 was never true
3804 break
3806 sr.xapi.update_task_progress("done", coalesced)
3808 sr.cleanupCoalesceJournals()
3809 # Create the init file here in case startup is waiting on it
3810 _create_init_file(sr.uuid)
3811 sr.scanLocked()
3812 sr.updateBlockInfo()
3814 howmany = len(sr.findGarbage())
3815 if howmany > 0:
3816 Util.log("Found %d orphaned vdis" % howmany)
3817 sr.lock()
3818 try:
3819 sr.garbageCollect(dryRun)
3820 finally:
3821 sr.unlock()
3822 sr.xapi.srUpdate()
3824 candidate = sr.findCoalesceable()
3825 if candidate:
3826 util.fistpoint.activate(
3827 "LVHDRT_finding_a_suitable_pair", sr.uuid)
3828 sr.coalesce(candidate, dryRun)
3829 sr.xapi.srUpdate()
3830 coalesced += 1
3831 continue
3833 candidate = sr.findLeafCoalesceable()
3834 if candidate: 3834 ↛ 3841line 3834 didn't jump to line 3841, because the condition on line 3834 was never false
3835 sr.coalesceLeaf(candidate, dryRun)
3836 sr.xapi.srUpdate()
3837 coalesced += 1
3838 continue
3840 finally:
3841 lockGCRunning.release() 3841 ↛ 3846line 3841 didn't jump to line 3846, because the break on line 3804 wasn't executed
3842 except:
3843 task_status = "failure"
3844 raise
3845 finally:
3846 sr.xapi.set_task_status(task_status)
3847 Util.log("GC process exiting, no work left")
3848 _create_init_file(sr.uuid)
3849 lockGCActive.release()
3852def _gc(session, srUuid, dryRun=False, immediate=False):
3853 init(srUuid)
3854 sr = SR.getInstance(srUuid, session)
3855 if not sr.gcEnabled(False): 3855 ↛ 3856line 3855 didn't jump to line 3856, because the condition on line 3855 was never true
3856 return
3858 try:
3859 _gcLoop(sr, dryRun, immediate=immediate)
3860 finally:
3861 sr.check_no_space_candidates()
3862 sr.cleanup()
3863 sr.logFilter.logState()
3864 del sr.xapi
3867def _abort(srUuid, soft=False):
3868 """Aborts an GC/coalesce.
3870 srUuid: the UUID of the SR whose GC/coalesce must be aborted
3871 soft: If set to True and there is a pending abort signal, the function
3872 doesn't do anything. If set to False, a new abort signal is issued.
3874 returns: If soft is set to False, we return True holding lockGCActive. If
3875 soft is set to False and an abort signal is pending, we return False
3876 without holding lockGCActive. An exception is raised in case of error."""
3877 Util.log("=== SR %s: abort ===" % (srUuid))
3878 init(srUuid)
3879 if not lockGCActive.acquireNoblock():
3880 gotLock = False
3881 Util.log("Aborting currently-running instance (SR %s)" % srUuid)
3882 abortFlag = IPCFlag(srUuid)
3883 if not abortFlag.set(FLAG_TYPE_ABORT, soft):
3884 return False
3885 for i in range(SR.LOCK_RETRY_ATTEMPTS):
3886 gotLock = lockGCActive.acquireNoblock()
3887 if gotLock:
3888 break
3889 time.sleep(SR.LOCK_RETRY_INTERVAL)
3890 abortFlag.clear(FLAG_TYPE_ABORT)
3891 if not gotLock:
3892 raise util.CommandException(code=errno.ETIMEDOUT,
3893 reason="SR %s: error aborting existing process" % srUuid)
3894 return True
3897def init(srUuid):
3898 global lockGCRunning
3899 if not lockGCRunning: 3899 ↛ 3900line 3899 didn't jump to line 3900, because the condition on line 3899 was never true
3900 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
3901 global lockGCActive
3902 if not lockGCActive: 3902 ↛ 3903line 3902 didn't jump to line 3903, because the condition on line 3902 was never true
3903 lockGCActive = LockActive(srUuid)
3906class LockActive:
3907 """
3908 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired
3909 if another process holds the SR lock.
3910 """
3911 def __init__(self, srUuid):
3912 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
3913 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid)
3915 def acquireNoblock(self):
3916 self._srLock.acquire()
3918 try:
3919 return self._lock.acquireNoblock()
3920 finally:
3921 self._srLock.release()
3923 def release(self):
3924 self._lock.release()
3927def usage():
3928 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR
3930Parameters:
3931 -u --uuid UUID SR UUID
3932 and one of:
3933 -g --gc garbage collect, coalesce, and repeat while there is work
3934 -G --gc_force garbage collect once, aborting any current operations
3935 -c --cache-clean <max_age> clean up IntelliCache cache files older than
3936 max_age hours
3937 -a --abort abort any currently running operation (GC or coalesce)
3938 -q --query query the current state (GC'ing, coalescing or not running)
3939 -x --disable disable GC/coalesce (will be in effect until you exit)
3940 -t --debug see Debug below
3942Options:
3943 -b --background run in background (return immediately) (valid for -g only)
3944 -f --force continue in the presence of VHDs with errors (when doing
3945 GC, this might cause removal of any such VHDs) (only valid
3946 for -G) (DANGEROUS)
3948Debug:
3949 The --debug parameter enables manipulation of LVHD VDIs for debugging
3950 purposes. ** NEVER USE IT ON A LIVE VM **
3951 The following parameters are required:
3952 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate",
3953 "deflate".
3954 -v --vdi_uuid VDI UUID
3955 """
3956 #-d --dry-run don't actually perform any SR-modifying operations
3957 print(output)
3958 Util.log("(Invalid usage)")
3959 sys.exit(1)
3962##############################################################################
3963#
3964# API
3965#
3966def abort(srUuid, soft=False):
3967 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair.
3968 """
3969 if _abort(srUuid, soft):
3970 stop_gc_service(srUuid)
3971 Util.log("abort: releasing the process lock")
3972 lockGCActive.release()
3973 return True
3974 else:
3975 return False
3978def run_gc(session, srUuid, dryRun, immediate=False):
3979 try:
3980 _gc(session, srUuid, dryRun, immediate=immediate)
3981 return 0
3982 except AbortException:
3983 Util.log("Aborted")
3984 return 2
3985 except Exception:
3986 Util.logException("gc")
3987 Util.log("* * * * * SR %s: ERROR\n" % srUuid)
3988 return 1
3991def gc(session, srUuid, inBackground, dryRun=False):
3992 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return
3993 immediately if inBackground=True.
3995 The following algorithm is used:
3996 1. If we are already GC'ing in this SR, return
3997 2. If we are already coalescing a VDI pair:
3998 a. Scan the SR and determine if the VDI pair is GC'able
3999 b. If the pair is not GC'able, return
4000 c. If the pair is GC'able, abort coalesce
4001 3. Scan the SR
4002 4. If there is nothing to collect, nor to coalesce, return
4003 5. If there is something to collect, GC all, then goto 3
4004 6. If there is something to coalesce, coalesce one pair, then goto 3
4005 """
4006 Util.log("=== SR %s: gc ===" % srUuid)
4008 signal.signal(signal.SIGTERM, receiveSignal)
4010 if inBackground:
4011 if daemonize(): 4011 ↛ exitline 4011 didn't return from function 'gc', because the condition on line 4011 was never false
4012 # we are now running in the background. Catch & log any errors
4013 # because there is no other way to propagate them back at this
4014 # point
4016 run_gc(None, srUuid, dryRun)
4017 os._exit(0)
4018 else:
4019 os._exit(run_gc(session, srUuid, dryRun, immediate=True))
4022def start_gc(session, sr_uuid):
4023 """
4024 This function is used to try to start a backgrounded GC session by forking
4025 the current process. If using the systemd version, call start_gc_service() instead.
4026 """
4027 # don't bother if an instance already running (this is just an
4028 # optimization to reduce the overhead of forking a new process if we
4029 # don't have to, but the process will check the lock anyways)
4030 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
4031 if not lockRunning.acquireNoblock():
4032 if should_preempt(session, sr_uuid):
4033 util.SMlog("Aborting currently-running coalesce of garbage VDI")
4034 try:
4035 if not abort(sr_uuid, soft=True):
4036 util.SMlog("The GC has already been scheduled to re-start")
4037 except util.CommandException as e:
4038 if e.code != errno.ETIMEDOUT:
4039 raise
4040 util.SMlog('failed to abort the GC')
4041 else:
4042 util.SMlog("A GC instance already running, not kicking")
4043 return
4044 else:
4045 lockRunning.release()
4047 util.SMlog(f"Starting GC file is {__file__}")
4048 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
4049 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4051def _gc_service_cmd(sr_uuid, action, extra_args=None):
4052 """
4053 Build and run the systemctl command for the GC service using util.doexec.
4054 """
4055 sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
4056 cmd=["/usr/bin/systemctl", "--quiet"]
4057 if extra_args:
4058 cmd.extend(extra_args)
4059 cmd += [action, f"SMGC@{sr_uuid_esc}"]
4060 return util.doexec(cmd)
4063def start_gc_service(sr_uuid, wait=False):
4064 """
4065 This starts the templated systemd service which runs GC on the given SR UUID.
4066 If the service was already started, this is a no-op.
4068 Because the service is a one-shot with RemainAfterExit=no, when called with
4069 wait=True this will run the service synchronously and will not return until the
4070 run has finished. This is used to force a run of the GC instead of just kicking it
4071 in the background.
4072 """
4073 util.SMlog(f"Kicking SMGC@{sr_uuid}...")
4074 _gc_service_cmd(sr_uuid, "start", extra_args=None if wait else ["--no-block"])
4077def stop_gc_service(sr_uuid):
4078 """
4079 Stops the templated systemd service which runs GC on the given SR UUID.
4080 """
4081 util.SMlog(f"Stopping SMGC@{sr_uuid}...")
4082 (rc, _stdout, stderr) = _gc_service_cmd(sr_uuid, "stop")
4083 if rc != 0: 4083 ↛ exitline 4083 didn't return from function 'stop_gc_service', because the condition on line 4083 was never false
4084 util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`")
4087def wait_for_completion(sr_uuid):
4088 while get_state(sr_uuid):
4089 time.sleep(5)
4092def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
4093 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
4094 the SR lock is held.
4095 The following algorithm is used:
4096 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce
4097 2. Scan the SR
4098 3. GC
4099 4. return
4100 """
4101 Util.log("=== SR %s: gc_force ===" % srUuid)
4102 init(srUuid)
4103 sr = SR.getInstance(srUuid, session, lockSR, True)
4104 if not lockGCActive.acquireNoblock():
4105 abort(srUuid)
4106 else:
4107 Util.log("Nothing was running, clear to proceed")
4109 if force:
4110 Util.log("FORCED: will continue even if there are VHD errors")
4111 sr.scanLocked(force)
4112 sr.cleanupCoalesceJournals()
4114 try:
4115 sr.cleanupCache()
4116 sr.garbageCollect(dryRun)
4117 finally:
4118 sr.cleanup()
4119 sr.logFilter.logState()
4120 lockGCActive.release()
4123def get_state(srUuid):
4124 """Return whether GC/coalesce is currently running or not. This asks systemd for
4125 the state of the templated SMGC service and will return True if it is "activating"
4126 or "running" (for completeness, as in practice it will never achieve the latter state)
4127 """
4128 sr_uuid_esc = srUuid.replace("-", "\\x2d")
4129 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
4130 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4131 state = result.stdout.decode('utf-8').rstrip()
4132 if state == "activating" or state == "running":
4133 return True
4134 return False
4137def should_preempt(session, srUuid):
4138 sr = SR.getInstance(srUuid, session)
4139 entries = sr.journaler.getAll(VDI.JRN_COALESCE)
4140 if len(entries) == 0:
4141 return False
4142 elif len(entries) > 1:
4143 raise util.SMException("More than one coalesce entry: " + str(entries))
4144 sr.scanLocked()
4145 coalescedUuid = entries.popitem()[0]
4146 garbage = sr.findGarbage()
4147 for vdi in garbage:
4148 if vdi.uuid == coalescedUuid:
4149 return True
4150 return False
4153def get_coalesceable_leaves(session, srUuid, vdiUuids):
4154 coalesceable = []
4155 sr = SR.getInstance(srUuid, session)
4156 sr.scanLocked()
4157 for uuid in vdiUuids:
4158 vdi = sr.getVDI(uuid)
4159 if not vdi:
4160 raise util.SMException("VDI %s not found" % uuid)
4161 if vdi.isLeafCoalesceable():
4162 coalesceable.append(uuid)
4163 return coalesceable
4166def cache_cleanup(session, srUuid, maxAge):
4167 sr = SR.getInstance(srUuid, session)
4168 return sr.cleanupCache(maxAge)
4171def debug(sr_uuid, cmd, vdi_uuid):
4172 Util.log("Debug command: %s" % cmd)
4173 sr = SR.getInstance(sr_uuid, None)
4174 if not isinstance(sr, LVHDSR):
4175 print("Error: not an LVHD SR")
4176 return
4177 sr.scanLocked()
4178 vdi = sr.getVDI(vdi_uuid)
4179 if not vdi:
4180 print("Error: VDI %s not found")
4181 return
4182 print("Running %s on SR %s" % (cmd, sr))
4183 print("VDI before: %s" % vdi)
4184 if cmd == "activate":
4185 vdi._activate()
4186 print("VDI file: %s" % vdi.path)
4187 if cmd == "deactivate":
4188 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid
4189 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False)
4190 if cmd == "inflate":
4191 vdi.inflateFully()
4192 sr.cleanup()
4193 if cmd == "deflate":
4194 vdi.deflate()
4195 sr.cleanup()
4196 sr.scanLocked()
4197 print("VDI after: %s" % vdi)
4200def abort_optional_reenable(uuid):
4201 print("Disabling GC/coalesce for %s" % uuid)
4202 ret = _abort(uuid)
4203 input("Press enter to re-enable...")
4204 print("GC/coalesce re-enabled")
4205 lockGCRunning.release()
4206 if ret:
4207 lockGCActive.release()
4210##############################################################################
4211#
4212# CLI
4213#
4214def main():
4215 action = ""
4216 maxAge = 0
4217 uuid = ""
4218 background = False
4219 force = False
4220 dryRun = False
4221 debug_cmd = ""
4222 vdi_uuid = ""
4223 shortArgs = "gGc:aqxu:bfdt:v:"
4224 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable",
4225 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
4227 try:
4228 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs)
4229 except getopt.GetoptError:
4230 usage()
4231 for o, a in opts:
4232 if o in ("-g", "--gc"):
4233 action = "gc"
4234 if o in ("-G", "--gc_force"):
4235 action = "gc_force"
4236 if o in ("-c", "--clean_cache"):
4237 action = "clean_cache"
4238 maxAge = int(a)
4239 if o in ("-a", "--abort"):
4240 action = "abort"
4241 if o in ("-q", "--query"):
4242 action = "query"
4243 if o in ("-x", "--disable"):
4244 action = "disable"
4245 if o in ("-u", "--uuid"):
4246 uuid = a
4247 if o in ("-b", "--background"):
4248 background = True
4249 if o in ("-f", "--force"):
4250 force = True
4251 if o in ("-d", "--dry-run"):
4252 Util.log("Dry run mode")
4253 dryRun = True
4254 if o in ("-t", "--debug"):
4255 action = "debug"
4256 debug_cmd = a
4257 if o in ("-v", "--vdi_uuid"):
4258 vdi_uuid = a
4260 if not action or not uuid:
4261 usage()
4262 if action == "debug" and not (debug_cmd and vdi_uuid) or \
4263 action != "debug" and (debug_cmd or vdi_uuid):
4264 usage()
4266 if action != "query" and action != "debug":
4267 print("All output goes to log")
4269 if action == "gc":
4270 gc(None, uuid, background, dryRun)
4271 elif action == "gc_force":
4272 gc_force(None, uuid, force, dryRun, True)
4273 elif action == "clean_cache":
4274 cache_cleanup(None, uuid, maxAge)
4275 elif action == "abort":
4276 abort(uuid)
4277 elif action == "query":
4278 print("Currently running: %s" % get_state(uuid))
4279 elif action == "disable":
4280 abort_optional_reenable(uuid)
4281 elif action == "debug":
4282 debug(uuid, debug_cmd, vdi_uuid)
4285if __name__ == '__main__': 4285 ↛ 4286line 4285 didn't jump to line 4286, because the condition on line 4285 was never true
4286 main()