Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil, MultiLinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109class CancelException(util.SMException): 

110 pass 

111 

112def receiveSignal(signalNumber, frame): 

113 global SIGTERM 

114 

115 util.SMlog("GC: recieved SIGTERM") 

116 SIGTERM = True 

117 return 

118 

119 

120################################################################################ 

121# 

122# Util 

123# 

124class Util: 

125 RET_RC = 1 

126 RET_STDOUT = 2 

127 RET_STDERR = 4 

128 

129 UUID_LEN = 36 

130 

131 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

132 

133 @staticmethod 

134 def log(text) -> None: 

135 util.SMlog(text, ident="SMGC") 

136 

137 @staticmethod 

138 def logException(tag): 

139 info = sys.exc_info() 

140 if info[0] == SystemExit: 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never true

141 # this should not be happening when catching "Exception", but it is 

142 sys.exit(0) 

143 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

144 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

145 Util.log(" ***********************") 

146 Util.log(" * E X C E P T I O N *") 

147 Util.log(" ***********************") 

148 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

149 Util.log(tb) 

150 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

151 

152 @staticmethod 

153 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

154 "Execute a subprocess, then return its return code, stdout, stderr" 

155 proc = subprocess.Popen(args, 

156 stdin=subprocess.PIPE, \ 

157 stdout=subprocess.PIPE, \ 

158 stderr=subprocess.PIPE, \ 

159 shell=True, \ 

160 close_fds=True) 

161 (stdout, stderr) = proc.communicate(inputtext) 

162 stdout = str(stdout) 

163 stderr = str(stderr) 

164 rc = proc.returncode 

165 if log: 

166 Util.log("`%s`: %s" % (args, rc)) 

167 if type(expectedRC) != type([]): 

168 expectedRC = [expectedRC] 

169 if not rc in expectedRC: 

170 reason = stderr.strip() 

171 if stdout.strip(): 

172 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

173 Util.log("Failed: %s" % reason) 

174 raise util.CommandException(rc, args, reason) 

175 

176 if ret == Util.RET_RC: 

177 return rc 

178 if ret == Util.RET_STDERR: 

179 return stderr 

180 return stdout 

181 

182 @staticmethod 

183 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut, prefSig=signal.SIGKILL): 

184 """execute func in a separate thread and kill it if abortTest signals 

185 so""" 

186 abortSignaled = abortTest() # check now before we clear resultFlag 

187 resultFlag = IPCFlag(ns) 

188 resultFlag.clearAll() 

189 pid = os.fork() 

190 if pid: 

191 startTime = _time() 

192 try: 

193 while True: 

194 if resultFlag.test("success"): 

195 Util.log(" Child process completed successfully") 

196 resultFlag.clear("success") 

197 return 

198 if resultFlag.test("failure"): 

199 resultFlag.clear("failure") 

200 raise util.SMException("Child process exited with error") 

201 if abortTest() or abortSignaled or SIGTERM: 

202 os.killpg(pid, prefSig) 

203 raise AbortException("Aborting due to signal") 

204 if timeOut and _time() - startTime > timeOut: 

205 os.killpg(pid, prefSig) 

206 resultFlag.clearAll() 

207 raise util.SMException("Timed out") 

208 time.sleep(pollInterval) 

209 finally: 

210 wait_pid = 0 

211 rc = -1 

212 count = 0 

213 while wait_pid == 0 and count < 10: 

214 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

215 if wait_pid == 0: 

216 time.sleep(2) 

217 count += 1 

218 

219 if wait_pid == 0: 

220 Util.log("runAbortable: wait for process completion timed out") 

221 else: 

222 os.setpgrp() 

223 try: 

224 if func() == ret: 

225 resultFlag.set("success") 

226 else: 

227 resultFlag.set("failure") 

228 except Exception as e: 

229 Util.log("Child process failed with : (%s)" % e) 

230 resultFlag.set("failure") 

231 Util.logException("This exception has occured") 

232 os._exit(0) 

233 

234 @staticmethod 

235 def num2str(number): 

236 for prefix in ("G", "M", "K"): 236 ↛ 239line 236 didn't jump to line 239, because the loop on line 236 didn't complete

237 if number >= Util.PREFIX[prefix]: 

238 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

239 return "%s" % number 

240 

241 @staticmethod 

242 def numBits(val): 

243 count = 0 

244 while val: 

245 count += val & 1 

246 val = val >> 1 

247 return count 

248 

249 @staticmethod 

250 def countBits(bitmap1, bitmap2): 

251 """return bit count in the bitmap produced by ORing the two bitmaps""" 

252 len1 = len(bitmap1) 

253 len2 = len(bitmap2) 

254 lenLong = len1 

255 lenShort = len2 

256 bitmapLong = bitmap1 

257 if len2 > len1: 

258 lenLong = len2 

259 lenShort = len1 

260 bitmapLong = bitmap2 

261 

262 count = 0 

263 for i in range(lenShort): 

264 val = bitmap1[i] | bitmap2[i] 

265 count += Util.numBits(val) 

266 

267 for i in range(i + 1, lenLong): 

268 val = bitmapLong[i] 

269 count += Util.numBits(val) 

270 return count 

271 

272 @staticmethod 

273 def getThisScript(): 

274 thisScript = util.get_real_path(__file__) 

275 if thisScript.endswith(".pyc"): 

276 thisScript = thisScript[:-1] 

277 return thisScript 

278 

279 

280################################################################################ 

281# 

282# XAPI 

283# 

284class XAPI: 

285 USER = "root" 

286 PLUGIN_ON_SLAVE = "on-slave" 

287 

288 CONFIG_SM = 0 

289 CONFIG_OTHER = 1 

290 CONFIG_ON_BOOT = 2 

291 CONFIG_ALLOW_CACHING = 3 

292 

293 CONFIG_NAME = { 

294 CONFIG_SM: "sm-config", 

295 CONFIG_OTHER: "other-config", 

296 CONFIG_ON_BOOT: "on-boot", 

297 CONFIG_ALLOW_CACHING: "allow_caching" 

298 } 

299 

300 class LookupError(util.SMException): 

301 pass 

302 

303 @staticmethod 

304 def getSession(): 

305 session = XenAPI.xapi_local() 

306 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

307 return session 

308 

309 def __init__(self, session, srUuid): 

310 self.sessionPrivate = False 

311 self.session = session 

312 if self.session is None: 

313 self.session = self.getSession() 

314 self.sessionPrivate = True 

315 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

316 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

317 self.hostUuid = util.get_this_host() 

318 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

319 self.task = None 

320 self.task_progress = {"coalescable": 0, "done": 0} 

321 

322 def __del__(self): 

323 if self.sessionPrivate: 

324 self.session.xenapi.session.logout() 

325 

326 @property 

327 def srRef(self): 

328 return self._srRef 

329 

330 def isPluggedHere(self): 

331 pbds = self.getAttachedPBDs() 

332 for pbdRec in pbds: 

333 if pbdRec["host"] == self._hostRef: 

334 return True 

335 return False 

336 

337 def poolOK(self): 

338 host_recs = self.session.xenapi.host.get_all_records() 

339 for host_ref, host_rec in host_recs.items(): 

340 if not host_rec["enabled"]: 

341 Util.log("Host %s not enabled" % host_rec["uuid"]) 

342 return False 

343 return True 

344 

345 def isMaster(self): 

346 if self.srRecord["shared"]: 

347 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

348 return pool["master"] == self._hostRef 

349 else: 

350 pbds = self.getAttachedPBDs() 

351 if len(pbds) < 1: 

352 raise util.SMException("Local SR not attached") 

353 elif len(pbds) > 1: 

354 raise util.SMException("Local SR multiply attached") 

355 return pbds[0]["host"] == self._hostRef 

356 

357 def getAttachedPBDs(self): 

358 """Return PBD records for all PBDs of this SR that are currently 

359 attached""" 

360 attachedPBDs = [] 

361 pbds = self.session.xenapi.PBD.get_all_records() 

362 for pbdRec in pbds.values(): 

363 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

364 attachedPBDs.append(pbdRec) 

365 return attachedPBDs 

366 

367 def getOnlineHosts(self): 

368 return util.get_online_hosts(self.session) 

369 

370 def ensureInactive(self, hostRef, args): 

371 text = self.session.xenapi.host.call_plugin( \ 

372 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

373 Util.log("call-plugin returned: '%s'" % text) 

374 

375 def getRecordHost(self, hostRef): 

376 return self.session.xenapi.host.get_record(hostRef) 

377 

378 def _getRefVDI(self, uuid): 

379 return self.session.xenapi.VDI.get_by_uuid(uuid) 

380 

381 def getRefVDI(self, vdi): 

382 return self._getRefVDI(vdi.uuid) 

383 

384 def getRecordVDI(self, uuid): 

385 try: 

386 ref = self._getRefVDI(uuid) 

387 return self.session.xenapi.VDI.get_record(ref) 

388 except XenAPI.Failure: 

389 return None 

390 

391 def singleSnapshotVDI(self, vdi): 

392 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

393 {"type": "internal"}) 

394 

395 def forgetVDI(self, srUuid, vdiUuid): 

396 """Forget the VDI, but handle the case where the VDI has already been 

397 forgotten (i.e. ignore errors)""" 

398 try: 

399 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

400 self.session.xenapi.VDI.forget(vdiRef) 

401 except XenAPI.Failure: 

402 pass 

403 

404 def getConfigVDI(self, vdi, key): 

405 kind = vdi.CONFIG_TYPE[key] 

406 if kind == self.CONFIG_SM: 

407 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

408 elif kind == self.CONFIG_OTHER: 

409 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

410 elif kind == self.CONFIG_ON_BOOT: 

411 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

412 elif kind == self.CONFIG_ALLOW_CACHING: 

413 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

414 else: 

415 assert(False) 

416 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

417 return cfg 

418 

419 def removeFromConfigVDI(self, vdi, key): 

420 kind = vdi.CONFIG_TYPE[key] 

421 if kind == self.CONFIG_SM: 

422 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

423 elif kind == self.CONFIG_OTHER: 

424 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

425 else: 

426 assert(False) 

427 

428 def addToConfigVDI(self, vdi, key, val): 

429 kind = vdi.CONFIG_TYPE[key] 

430 if kind == self.CONFIG_SM: 

431 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

432 elif kind == self.CONFIG_OTHER: 

433 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

434 else: 

435 assert(False) 

436 

437 def isSnapshot(self, vdi): 

438 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

439 

440 def markCacheSRsDirty(self): 

441 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

442 'field "local_cache_enabled" = "true"') 

443 for sr_ref in sr_refs: 

444 Util.log("Marking SR %s dirty" % sr_ref) 

445 util.set_dirty(self.session, sr_ref) 

446 

447 def srUpdate(self): 

448 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

449 abortFlag = IPCFlag(self.srRecord["uuid"]) 

450 task = self.session.xenapi.Async.SR.update(self._srRef) 

451 cancelTask = True 

452 try: 

453 for i in range(60): 

454 status = self.session.xenapi.task.get_status(task) 

455 if not status == "pending": 

456 Util.log("SR.update_asynch status changed to [%s]" % status) 

457 cancelTask = False 

458 return 

459 if abortFlag.test(FLAG_TYPE_ABORT): 

460 Util.log("Abort signalled during srUpdate, cancelling task...") 

461 try: 

462 self.session.xenapi.task.cancel(task) 

463 cancelTask = False 

464 Util.log("Task cancelled") 

465 except: 

466 pass 

467 return 

468 time.sleep(1) 

469 finally: 

470 if cancelTask: 

471 self.session.xenapi.task.cancel(task) 

472 self.session.xenapi.task.destroy(task) 

473 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

474 

475 def update_task(self): 

476 self.session.xenapi.task.set_other_config( 

477 self.task, 

478 { 

479 "applies_to": self._srRef 

480 }) 

481 total = self.task_progress['coalescable'] + self.task_progress['done'] 

482 if (total > 0): 

483 self.session.xenapi.task.set_progress( 

484 self.task, float(self.task_progress['done']) / total) 

485 

486 def create_task(self, label, description): 

487 self.task = self.session.xenapi.task.create(label, description) 

488 self.update_task() 

489 

490 def update_task_progress(self, key, value): 

491 self.task_progress[key] = value 

492 if self.task: 

493 self.update_task() 

494 

495 def set_task_status(self, status): 

496 if self.task: 

497 self.session.xenapi.task.set_status(self.task, status) 

498 

499 

500################################################################################ 

501# 

502# VDI 

503# 

504class VDI(object): 

505 """Object representing a VDI of a COW-based SR""" 

506 

507 POLL_INTERVAL = 1 

508 POLL_TIMEOUT = 30 

509 DEVICE_MAJOR = 202 

510 

511 # config keys & values 

512 DB_VDI_PARENT = "vhd-parent" 

513 DB_VDI_TYPE = "vdi_type" 

514 DB_VDI_BLOCKS = "vhd-blocks" 

515 DB_VDI_PAUSED = "paused" 

516 DB_VDI_RELINKING = "relinking" 

517 DB_VDI_ACTIVATING = "activating" 

518 DB_GC = "gc" 

519 DB_COALESCE = "coalesce" 

520 DB_LEAFCLSC = "leaf-coalesce" # config key 

521 DB_GC_NO_SPACE = "gc_no_space" 

522 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

523 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

524 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

525 # no space to snap-coalesce or unable to keep 

526 # up with VDI. This is not used by the SM, it 

527 # might be used by external components. 

528 DB_ONBOOT = "on-boot" 

529 ONBOOT_RESET = "reset" 

530 DB_ALLOW_CACHING = "allow_caching" 

531 

532 CONFIG_TYPE = { 

533 DB_VDI_PARENT: XAPI.CONFIG_SM, 

534 DB_VDI_TYPE: XAPI.CONFIG_SM, 

535 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

536 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

537 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

538 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

539 DB_GC: XAPI.CONFIG_OTHER, 

540 DB_COALESCE: XAPI.CONFIG_OTHER, 

541 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

542 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

543 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

544 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

545 } 

546 

547 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

548 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

549 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

550 # feasibility of leaf coalesce 

551 

552 JRN_RELINK = "relink" # journal entry type for relinking children 

553 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

554 JRN_LEAF = "leaf" # used in coalesce-leaf 

555 

556 STR_TREE_INDENT = 4 

557 

558 def __init__(self, sr, uuid, vdi_type): 

559 self.sr = sr 

560 self.scanError = True 

561 self.uuid = uuid 

562 self.vdi_type = vdi_type 

563 self.fileName = "" 

564 self.parentUuid = "" 

565 self.sizeVirt = -1 

566 self._sizePhys = -1 

567 self._sizeAllocated = -1 

568 self._hidden = False 

569 self.parent = None 

570 self.children = [] 

571 self._vdiRef = None 

572 self.cowutil = getCowUtil(vdi_type) 

573 self._clearRef() 

574 

575 @staticmethod 

576 def extractUuid(path): 

577 raise NotImplementedError("Implement in sub class") 

578 

579 def load(self, info=None) -> None: 

580 """Load VDI info""" 

581 pass 

582 

583 def getDriverName(self) -> str: 

584 return self.vdi_type 

585 

586 def getRef(self): 

587 if self._vdiRef is None: 

588 self._vdiRef = self.sr.xapi.getRefVDI(self) 

589 return self._vdiRef 

590 

591 def getConfig(self, key, default=None): 

592 config = self.sr.xapi.getConfigVDI(self, key) 

593 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 593 ↛ 594line 593 didn't jump to line 594, because the condition on line 593 was never true

594 val = config 

595 else: 

596 val = config.get(key) 

597 if val: 

598 return val 

599 return default 

600 

601 def setConfig(self, key, val): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 self.sr.xapi.addToConfigVDI(self, key, val) 

604 Util.log("Set %s = %s for %s" % (key, val, self)) 

605 

606 def delConfig(self, key): 

607 self.sr.xapi.removeFromConfigVDI(self, key) 

608 Util.log("Removed %s from %s" % (key, self)) 

609 

610 def ensureUnpaused(self): 

611 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

612 Util.log("Unpausing VDI %s" % self) 

613 self.unpause() 

614 

615 def pause(self, failfast=False) -> None: 

616 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

617 self.uuid, failfast): 

618 raise util.SMException("Failed to pause VDI %s" % self) 

619 

620 def _report_tapdisk_unpause_error(self): 

621 try: 

622 xapi = self.sr.xapi.session.xenapi 

623 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

624 msg_name = "failed to unpause tapdisk" 

625 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

626 "VMs using this tapdisk have lost access " \ 

627 "to the corresponding disk(s)" % self.uuid 

628 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

629 except Exception as e: 

630 util.SMlog("failed to generate message: %s" % e) 

631 

632 def unpause(self): 

633 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

634 self.uuid): 

635 self._report_tapdisk_unpause_error() 

636 raise util.SMException("Failed to unpause VDI %s" % self) 

637 

638 def refresh(self, ignoreNonexistent=True): 

639 """Pause-unpause in one step""" 

640 self.sr.lock() 

641 try: 

642 try: 

643 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 643 ↛ 645line 643 didn't jump to line 645, because the condition on line 643 was never true

644 self.sr.uuid, self.uuid): 

645 self._report_tapdisk_unpause_error() 

646 raise util.SMException("Failed to refresh %s" % self) 

647 except XenAPI.Failure as e: 

648 if util.isInvalidVDI(e) and ignoreNonexistent: 

649 Util.log("VDI %s not found, ignoring" % self) 

650 return 

651 raise 

652 finally: 

653 self.sr.unlock() 

654 

655 def isSnapshot(self): 

656 return self.sr.xapi.isSnapshot(self) 

657 

658 def isAttachedRW(self): 

659 return util.is_attached_rw( 

660 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

661 

662 def getVDIBlocks(self): 

663 val = self.updateBlockInfo() 

664 bitmap = zlib.decompress(base64.b64decode(val)) 

665 return bitmap 

666 

667 def isCoalesceable(self): 

668 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

669 return not self.scanError and \ 

670 self.parent and \ 

671 len(self.parent.children) == 1 and \ 

672 self.isHidden() and \ 

673 len(self.children) > 0 

674 

675 def isLeafCoalesceable(self): 

676 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

677 return not self.scanError and \ 

678 self.parent and \ 

679 len(self.parent.children) == 1 and \ 

680 not self.isHidden() and \ 

681 len(self.children) == 0 

682 

683 def canLiveCoalesce(self, speed): 

684 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

685 isLeafCoalesceable() already""" 

686 feasibleSize = False 

687 allowedDownTime = \ 

688 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

689 allocated_size = self.getAllocatedSize() 

690 if speed: 

691 feasibleSize = \ 

692 allocated_size // speed < allowedDownTime 

693 else: 

694 feasibleSize = \ 

695 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

696 

697 return (feasibleSize or 

698 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

699 

700 def getAllPrunable(self): 

701 if len(self.children) == 0: # base case 

702 # it is possible to have a hidden leaf that was recently coalesced 

703 # onto its parent, its children already relinked but not yet 

704 # reloaded - in which case it may not be garbage collected yet: 

705 # some tapdisks could still be using the file. 

706 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

707 return [] 

708 if not self.scanError and self.isHidden(): 

709 return [self] 

710 return [] 

711 

712 thisPrunable = True 

713 vdiList = [] 

714 for child in self.children: 

715 childList = child.getAllPrunable() 

716 vdiList.extend(childList) 

717 if child not in childList: 

718 thisPrunable = False 

719 

720 # We can destroy the current VDI if all childs are hidden BUT the 

721 # current VDI must be hidden too to do that! 

722 # Example in this case (after a failed live leaf coalesce): 

723 # 

724 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

725 # SMGC: [32436] b5458d61(1.000G/4.127M) 

726 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

727 # 

728 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

729 # Normally we are not in this function when the delete action is 

730 # executed but in `_liveLeafCoalesce`. 

731 

732 if not self.scanError and not self.isHidden() and thisPrunable: 

733 vdiList.append(self) 

734 return vdiList 

735 

736 def getSizePhys(self) -> int: 

737 return self._sizePhys 

738 

739 def getAllocatedSize(self) -> int: 

740 return self._sizeAllocated 

741 

742 def getTreeRoot(self): 

743 "Get the root of the tree that self belongs to" 

744 root = self 

745 while root.parent: 

746 root = root.parent 

747 return root 

748 

749 def getTreeHeight(self): 

750 "Get the height of the subtree rooted at self" 

751 if len(self.children) == 0: 

752 return 1 

753 

754 maxChildHeight = 0 

755 for child in self.children: 

756 childHeight = child.getTreeHeight() 

757 if childHeight > maxChildHeight: 

758 maxChildHeight = childHeight 

759 

760 return maxChildHeight + 1 

761 

762 def getAllLeaves(self) -> List["VDI"]: 

763 "Get all leaf nodes in the subtree rooted at self" 

764 if len(self.children) == 0: 

765 return [self] 

766 

767 leaves = [] 

768 for child in self.children: 

769 leaves.extend(child.getAllLeaves()) 

770 return leaves 

771 

772 def updateBlockInfo(self) -> Optional[str]: 

773 val = base64.b64encode(self._queryCowBlocks()).decode() 

774 try: 

775 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

776 except Exception: 

777 if self.vdi_type != VdiType.QCOW2: 

778 raise 

779 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it 

780 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely). 

781 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped") 

782 

783 return val 

784 

785 def rename(self, uuid) -> None: 

786 "Rename the VDI file" 

787 assert(not self.sr.vdis.get(uuid)) 

788 self._clearRef() 

789 oldUuid = self.uuid 

790 self.uuid = uuid 

791 self.children = [] 

792 # updating the children themselves is the responsibility of the caller 

793 del self.sr.vdis[oldUuid] 

794 self.sr.vdis[self.uuid] = self 

795 

796 def delete(self) -> None: 

797 "Physically delete the VDI" 

798 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

799 lock.Lock.cleanupAll(self.uuid) 

800 self._clear() 

801 

802 def getParent(self) -> str: 

803 return self.cowutil.getParent(self.path, lambda x: x.strip()) 803 ↛ exitline 803 didn't run the lambda on line 803

804 

805 def repair(self, parent) -> None: 

806 self.cowutil.repair(parent) 

807 

808 @override 

809 def __str__(self) -> str: 

810 strHidden = "" 

811 if self.isHidden(): 811 ↛ 812line 811 didn't jump to line 812, because the condition on line 811 was never true

812 strHidden = "*" 

813 strSizeVirt = "?" 

814 if self.sizeVirt > 0: 814 ↛ 815line 814 didn't jump to line 815, because the condition on line 814 was never true

815 strSizeVirt = Util.num2str(self.sizeVirt) 

816 strSizePhys = "?" 

817 if self._sizePhys > 0: 817 ↛ 818line 817 didn't jump to line 818, because the condition on line 817 was never true

818 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

819 strSizeAllocated = "?" 

820 if self._sizeAllocated >= 0: 

821 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

822 strType = "[{}]".format(self.vdi_type) 

823 

824 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

825 strSizePhys, strSizeAllocated, strType) 

826 

827 def validate(self, fast=False) -> None: 

828 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 828 ↛ 829line 828 didn't jump to line 829, because the condition on line 828 was never true

829 raise util.SMException("COW image %s corrupted" % self) 

830 

831 def _clear(self): 

832 self.uuid = "" 

833 self.path = "" 

834 self.parentUuid = "" 

835 self.parent = None 

836 self._clearRef() 

837 

838 def _clearRef(self): 

839 self._vdiRef = None 

840 

841 @staticmethod 

842 def _cancel_exception(sig, frame): 

843 raise CancelException() 

844 

845 def _call_plugin_coalesce(self, hostRef): 

846 signal.signal(signal.SIGTERM, self._cancel_exception) 

847 args = {"path": self.path, "vdi_type": self.vdi_type} 

848 Util.log("Calling remote coalesce plugin with: {}".format(args)) 

849 try: 

850 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

851 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

852 Util.log("Remote coalesce returned {}".format(ret)) 

853 except CancelException: 

854 Util.log(f"Cancelling online coalesce following signal {args}") 

855 self.sr.xapi.session.xenapi.host.call_plugin( \ 

856 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

857 raise 

858 except Exception: 

859 raise 

860 

861 def _doCoalesceOnHost(self, hostRef): 

862 self.parent._increaseSizeVirt(self.sizeVirt) 

863 self.sr._updateSlavesOnResize(self.parent) 

864 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

865 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

866 

867 def abortTest(): 

868 file = self.sr._gc_running_file(self) 

869 try: 

870 with open(file, "r") as f: 

871 if not f.read(): 

872 Util.log("abortTest: Cancelling coalesce") 

873 return True 

874 except OSError as e: 

875 if e.errno == errno.ENOENT: 

876 Util.log("File {} does not exist".format(file)) 

877 else: 

878 Util.log("IOError: {}".format(e)) 

879 return True 

880 return False 

881 

882 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), \ 

883 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0, prefSig=signal.SIGTERM) 

884 

885 #self._verifyContents(0) 

886 self.parent.updateBlockInfo() 

887 

888 def _isOpenOnHosts(self) -> Optional[str]: 

889 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

890 hostRef = pbdRecord["host"] 

891 args = {"path": self.path} 

892 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

893 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

894 if is_openers: 

895 return hostRef 

896 return None 

897 

898 def _doCoalesce(self) -> None: 

899 """Coalesce self onto parent. Only perform the actual coalescing of 

900 an image, but not the subsequent relinking. We'll do that as the next step, 

901 after reloading the entire SR in case things have changed while we 

902 were coalescing""" 

903 self.validate() 

904 self.parent.validate(True) 

905 self.parent._increaseSizeVirt(self.sizeVirt) 

906 self.sr._updateSlavesOnResize(self.parent) 

907 self._coalesceCowImage(0) 

908 self.parent.validate(True) 

909 #self._verifyContents(0) 

910 self.parent.updateBlockInfo() 

911 

912 def _verifyContents(self, timeOut): 

913 Util.log(" Coalesce verification on %s" % self) 

914 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

915 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

916 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

917 Util.log(" Coalesce verification succeeded") 

918 

919 def _runTapdiskDiff(self): 

920 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

921 (self.getDriverName(), self.path, \ 

922 self.parent.getDriverName(), self.parent.path) 

923 Util.doexec(cmd, 0) 

924 return True 

925 

926 @staticmethod 

927 def _reportCoalesceError(vdi, ce): 

928 """Reports a coalesce error to XenCenter. 

929 

930 vdi: the VDI object on which the coalesce error occured 

931 ce: the CommandException that was raised""" 

932 

933 msg_name = os.strerror(ce.code) 

934 if ce.code == errno.ENOSPC: 

935 # TODO We could add more information here, e.g. exactly how much 

936 # space is required for the particular coalesce, as well as actions 

937 # to be taken by the user and consequences of not taking these 

938 # actions. 

939 msg_body = 'Run out of space while coalescing.' 

940 elif ce.code == errno.EIO: 

941 msg_body = 'I/O error while coalescing.' 

942 else: 

943 msg_body = '' 

944 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

945 % (vdi.sr.uuid, msg_name, msg_body)) 

946 

947 # Create a XenCenter message, but don't spam. 

948 xapi = vdi.sr.xapi.session.xenapi 

949 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

950 oth_cfg = xapi.SR.get_other_config(sr_ref) 

951 if COALESCE_ERR_RATE_TAG in oth_cfg: 

952 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

953 else: 

954 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

955 

956 xcmsg = False 

957 if coalesce_err_rate == 0: 

958 xcmsg = True 

959 elif coalesce_err_rate > 0: 

960 now = datetime.datetime.now() 

961 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

962 if COALESCE_LAST_ERR_TAG in sm_cfg: 

963 # seconds per message (minimum distance in time between two 

964 # messages in seconds) 

965 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

966 last = datetime.datetime.fromtimestamp( 

967 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

968 if now - last >= spm: 

969 xapi.SR.remove_from_sm_config(sr_ref, 

970 COALESCE_LAST_ERR_TAG) 

971 xcmsg = True 

972 else: 

973 xcmsg = True 

974 if xcmsg: 

975 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

976 str(now.strftime('%s'))) 

977 if xcmsg: 

978 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

979 

980 def coalesce(self) -> int: 

981 return self.cowutil.coalesce(self.path) 

982 

983 @staticmethod 

984 def _doCoalesceCowImage(vdi: "VDI"): 

985 try: 

986 startTime = time.time() 

987 allocated_size = vdi.getAllocatedSize() 

988 coalesced_size = vdi.coalesce() 

989 endTime = time.time() 

990 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

991 except util.CommandException as ce: 

992 # We use try/except for the following piece of code because it runs 

993 # in a separate process context and errors will not be caught and 

994 # reported by anyone. 

995 try: 

996 # Report coalesce errors back to user via XC 

997 VDI._reportCoalesceError(vdi, ce) 

998 except Exception as e: 

999 util.SMlog('failed to create XenCenter message: %s' % e) 

1000 raise ce 

1001 except: 

1002 raise 

1003 

1004 def _vdi_is_raw(self, vdi_path): 

1005 """ 

1006 Given path to vdi determine if it is raw 

1007 """ 

1008 uuid = self.extractUuid(vdi_path) 

1009 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

1010 

1011 def _coalesceCowImage(self, timeOut): 

1012 Util.log(" Running COW coalesce on %s" % self) 

1013 def abortTest(): 

1014 if self.cowutil.isCoalesceableOnRemote(): 

1015 file = self.sr._gc_running_file(self) 

1016 try: 

1017 with open(file, "r") as f: 

1018 if not f.read(): 

1019 return True 

1020 except OSError as e: 

1021 if e.errno == errno.ENOENT: 

1022 util.SMlog("File {} does not exist".format(file)) 

1023 else: 

1024 util.SMlog("IOError: {}".format(e)) 

1025 return True 

1026 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1027 

1028 try: 

1029 util.fistpoint.activate_custom_fn( 

1030 "cleanup_coalesceVHD_inject_failure", 

1031 util.inject_failure) 

1032 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1033 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1034 except: 

1035 # Exception at this phase could indicate a failure in COW coalesce 

1036 # or a kill of COW coalesce by runAbortable due to timeOut 

1037 # Try a repair and reraise the exception 

1038 parent = "" 

1039 try: 

1040 parent = self.getParent() 

1041 if not self._vdi_is_raw(parent): 

1042 # Repair error is logged and ignored. Error reraised later 

1043 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1044 'parent %s' % (self.uuid, parent)) 

1045 self.repair(parent) 

1046 except Exception as e: 

1047 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1048 'after failed coalesce on %s, err: %s' % 

1049 (parent, self.path, e)) 

1050 raise 

1051 

1052 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1053 

1054 def _relinkSkip(self) -> None: 

1055 """Relink children of this VDI to point to the parent of this VDI""" 

1056 abortFlag = IPCFlag(self.sr.uuid) 

1057 for child in self.children: 

1058 if abortFlag.test(FLAG_TYPE_ABORT): 1058 ↛ 1059line 1058 didn't jump to line 1059, because the condition on line 1058 was never true

1059 raise AbortException("Aborting due to signal") 

1060 Util.log(" Relinking %s from %s to %s" % \ 

1061 (child, self, self.parent)) 

1062 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1063 child._setParent(self.parent) 

1064 self.children = [] 

1065 

1066 def _reloadChildren(self, vdiSkip): 

1067 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1068 the COW image metadata for this file in any online VDI""" 

1069 abortFlag = IPCFlag(self.sr.uuid) 

1070 for child in self.children: 

1071 if child == vdiSkip: 

1072 continue 

1073 if abortFlag.test(FLAG_TYPE_ABORT): 1073 ↛ 1074line 1073 didn't jump to line 1074, because the condition on line 1073 was never true

1074 raise AbortException("Aborting due to signal") 

1075 Util.log(" Reloading VDI %s" % child) 

1076 child._reload() 

1077 

1078 def _reload(self): 

1079 """Pause & unpause to cause blktap to reload the image metadata""" 

1080 for child in self.children: 1080 ↛ 1081line 1080 didn't jump to line 1081, because the loop on line 1080 never started

1081 child._reload() 

1082 

1083 # only leaves can be attached 

1084 if len(self.children) == 0: 1084 ↛ exitline 1084 didn't return from function '_reload', because the condition on line 1084 was never false

1085 try: 

1086 self.delConfig(VDI.DB_VDI_RELINKING) 

1087 except XenAPI.Failure as e: 

1088 if not util.isInvalidVDI(e): 

1089 raise 

1090 self.refresh() 

1091 

1092 def _needRelink(self, list_not_to_relink): 

1093 """ 

1094 If we coalesce up the chain, we shouldn't need to do the relink at all, we only need to do the relink on the children if their direct parent was the one we were coalescing 

1095 """ 

1096 if not list_not_to_relink: 1096 ↛ 1098line 1096 didn't jump to line 1098, because the condition on line 1096 was never false

1097 return True 

1098 if self.uuid in list_not_to_relink: 

1099 return False 

1100 else: 

1101 return True 

1102 

1103 def _tagChildrenForRelink(self, list_not_to_relink=None): 

1104 if len(self.children) == 0: 

1105 retries = 0 

1106 try: 

1107 while retries < 15: 

1108 retries += 1 

1109 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1110 Util.log("VDI %s is activating, wait to relink" % 

1111 self.uuid) 

1112 else: 

1113 if self._needRelink(list_not_to_relink): 1113 ↛ 1123line 1113 didn't jump to line 1123, because the condition on line 1113 was never false

1114 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1115 

1116 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1117 self.delConfig(VDI.DB_VDI_RELINKING) 

1118 Util.log("VDI %s started activating while tagging" % 

1119 self.uuid) 

1120 else: 

1121 return 

1122 else: 

1123 Util.log(f"Not adding relinking tag to VDI {self.uuid}") 

1124 return 

1125 time.sleep(2) 

1126 

1127 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1128 except XenAPI.Failure as e: 

1129 if not util.isInvalidVDI(e): 

1130 raise 

1131 

1132 for child in self.children: 

1133 child._tagChildrenForRelink(list_not_to_relink) 

1134 

1135 def _loadInfoParent(self): 

1136 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1137 if ret: 

1138 self.parentUuid = ret 

1139 

1140 def _setParent(self, parent) -> None: 

1141 self.cowutil.setParent(self.path, parent.path, False) 

1142 self.parent = parent 

1143 self.parentUuid = parent.uuid 

1144 parent.children.append(self) 

1145 try: 

1146 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1147 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1148 (self.uuid, self.parentUuid)) 

1149 except: 

1150 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1151 (self.uuid, self.parentUuid)) 

1152 

1153 def _ensureParentActiveForRelink(self) -> None: 

1154 pass 

1155 

1156 def _update_vhd_parent(self, real_parent_uuid): 

1157 try: 

1158 self.setConfig(self.DB_VDI_PARENT, real_parent_uuid) 

1159 Util.log("Updated the vhd-parent field for child %s with real parent %s following a online coalesce" % \ 

1160 (self.uuid, real_parent_uuid)) 

1161 except: 

1162 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1163 (self.uuid, real_parent_uuid)) 

1164 

1165 def isHidden(self) -> bool: 

1166 if self._hidden is None: 1166 ↛ 1167line 1166 didn't jump to line 1167, because the condition on line 1166 was never true

1167 self._loadInfoHidden() 

1168 return self._hidden 

1169 

1170 def _loadInfoHidden(self) -> None: 

1171 hidden = self.cowutil.getHidden(self.path) 

1172 self._hidden = (hidden != 0) 

1173 

1174 def _setHidden(self, hidden=True) -> None: 

1175 self._hidden = None 

1176 self.cowutil.setHidden(self.path, hidden) 

1177 self._hidden = hidden 

1178 

1179 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1180 """ensure the virtual size of 'self' is at least 'size'. Note that 

1181 resizing a COW image must always be offline and atomically: the file must 

1182 not be open by anyone and no concurrent operations may take place. 

1183 Thus we use the Agent API call for performing paused atomic 

1184 operations. If the caller is already in the atomic context, it must 

1185 call with atomic = False""" 

1186 if self.sizeVirt >= size: 1186 ↛ 1188line 1186 didn't jump to line 1188, because the condition on line 1186 was never false

1187 return 

1188 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1189 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1190 

1191 msize = self.cowutil.getMaxResizeSize(self.path) 

1192 if (size <= msize): 

1193 self.cowutil.setSizeVirtFast(self.path, size) 

1194 else: 

1195 if atomic: 

1196 vdiList = self._getAllSubtree() 

1197 self.sr.lock() 

1198 try: 

1199 self.sr.pauseVDIs(vdiList) 

1200 try: 

1201 self._setSizeVirt(size) 

1202 finally: 

1203 self.sr.unpauseVDIs(vdiList) 

1204 finally: 

1205 self.sr.unlock() 

1206 else: 

1207 self._setSizeVirt(size) 

1208 

1209 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1210 

1211 def _setSizeVirt(self, size) -> None: 

1212 """WARNING: do not call this method directly unless all VDIs in the 

1213 subtree are guaranteed to be unplugged (and remain so for the duration 

1214 of the operation): this operation is only safe for offline COW images""" 

1215 jFile = os.path.join(self.sr.path, self.uuid) 

1216 self.cowutil.setSizeVirt(self.path, size, jFile) 

1217 

1218 def _queryCowBlocks(self) -> bytes: 

1219 return self.cowutil.getBlockBitmap(self.path) 

1220 

1221 def _getCoalescedSizeData(self): 

1222 """Get the data size of the resulting image if we coalesce self onto 

1223 parent. We calculate the actual size by using the image block allocation 

1224 information (as opposed to just adding up the two image sizes to get an 

1225 upper bound)""" 

1226 # make sure we don't use stale BAT info from vdi_rec since the child 

1227 # was writable all this time 

1228 self.delConfig(VDI.DB_VDI_BLOCKS) 

1229 blocksChild = self.getVDIBlocks() 

1230 blocksParent = self.parent.getVDIBlocks() 

1231 numBlocks = Util.countBits(blocksChild, blocksParent) 

1232 Util.log("Num combined blocks = %d" % numBlocks) 

1233 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1234 assert(sizeData <= self.sizeVirt) 

1235 return sizeData 

1236 

1237 def _calcExtraSpaceForCoalescing(self) -> int: 

1238 sizeData = self._getCoalescedSizeData() 

1239 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1240 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1241 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1242 return sizeCoalesced - self.parent.getSizePhys() 

1243 

1244 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1245 """How much extra space in the SR will be required to 

1246 [live-]leaf-coalesce this VDI""" 

1247 # the space requirements are the same as for inline coalesce 

1248 return self._calcExtraSpaceForCoalescing() 

1249 

1250 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1251 """How much extra space in the SR will be required to 

1252 snapshot-coalesce this VDI""" 

1253 return self._calcExtraSpaceForCoalescing() + \ 

1254 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1255 

1256 def _getAllSubtree(self): 

1257 """Get self and all VDIs in the subtree of self as a flat list""" 

1258 vdiList = [self] 

1259 for child in self.children: 

1260 vdiList.extend(child._getAllSubtree()) 

1261 return vdiList 

1262 

1263 

1264class FileVDI(VDI): 

1265 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1266 

1267 @override 

1268 @staticmethod 

1269 def extractUuid(path): 

1270 fileName = os.path.basename(path) 

1271 return os.path.splitext(fileName)[0] 

1272 

1273 def __init__(self, sr, uuid, vdi_type): 

1274 VDI.__init__(self, sr, uuid, vdi_type) 

1275 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1276 

1277 @override 

1278 def load(self, info=None) -> None: 

1279 if not info: 

1280 if not util.pathexists(self.path): 

1281 raise util.SMException("%s not found" % self.path) 

1282 try: 

1283 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1284 except util.SMException: 

1285 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1286 return 

1287 self.parent = None 

1288 self.children = [] 

1289 self.parentUuid = info.parentUuid 

1290 self.sizeVirt = info.sizeVirt 

1291 self._sizePhys = info.sizePhys 

1292 self._sizeAllocated = info.sizeAllocated 

1293 self._hidden = info.hidden 

1294 self.scanError = False 

1295 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1296 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1297 

1298 @override 

1299 def rename(self, uuid) -> None: 

1300 oldPath = self.path 

1301 VDI.rename(self, uuid) 

1302 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1303 self.path = os.path.join(self.sr.path, self.fileName) 

1304 assert(not util.pathexists(self.path)) 

1305 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1306 os.rename(oldPath, self.path) 

1307 

1308 @override 

1309 def delete(self) -> None: 

1310 if len(self.children) > 0: 1310 ↛ 1311line 1310 didn't jump to line 1311, because the condition on line 1310 was never true

1311 raise util.SMException("VDI %s has children, can't delete" % \ 

1312 self.uuid) 

1313 try: 

1314 self.sr.lock() 

1315 try: 

1316 os.unlink(self.path) 

1317 self.sr.forgetVDI(self.uuid) 

1318 finally: 

1319 self.sr.unlock() 

1320 except OSError: 

1321 raise util.SMException("os.unlink(%s) failed" % self.path) 

1322 VDI.delete(self) 

1323 

1324 @override 

1325 def getAllocatedSize(self) -> int: 

1326 if self._sizeAllocated == -1: 1326 ↛ 1327line 1326 didn't jump to line 1327, because the condition on line 1326 was never true

1327 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1328 return self._sizeAllocated 

1329 

1330 

1331class LVMVDI(VDI): 

1332 """Object representing a VDI in an LVM SR""" 

1333 

1334 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1335 

1336 @override 

1337 def load(self, info=None) -> None: 

1338 # `info` is always set. `None` default value is only here to match parent method. 

1339 assert info, "No info given to LVMVDI.load" 

1340 self.parent = None 

1341 self.children = [] 

1342 self._sizePhys = -1 

1343 self._sizeAllocated = -1 

1344 self.scanError = info.scanError 

1345 self.sizeLV = info.sizeLV 

1346 self.sizeVirt = info.sizeVirt 

1347 self.fileName = info.lvName 

1348 self.lvActive = info.lvActive 

1349 self.lvOpen = info.lvOpen 

1350 self.lvReadonly = info.lvReadonly 

1351 self._hidden = info.hidden 

1352 self.parentUuid = info.parentUuid 

1353 self.path = os.path.join(self.sr.path, self.fileName) 

1354 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1355 

1356 @override 

1357 @staticmethod 

1358 def extractUuid(path): 

1359 return LvmCowUtil.extractUuid(path) 

1360 

1361 def inflate(self, size): 

1362 """inflate the LV containing the COW image to 'size'""" 

1363 if not VdiType.isCowImage(self.vdi_type): 

1364 return 

1365 self._activate() 

1366 self.sr.lock() 

1367 try: 

1368 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1369 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1370 finally: 

1371 self.sr.unlock() 

1372 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1373 self._sizePhys = -1 

1374 self._sizeAllocated = -1 

1375 

1376 def deflate(self): 

1377 """deflate the LV containing the image to minimum""" 

1378 if not VdiType.isCowImage(self.vdi_type): 

1379 return 

1380 self._activate() 

1381 self.sr.lock() 

1382 try: 

1383 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1384 finally: 

1385 self.sr.unlock() 

1386 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1387 self._sizePhys = -1 

1388 self._sizeAllocated = -1 

1389 

1390 def inflateFully(self): 

1391 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1392 

1393 def inflateParentForCoalesce(self): 

1394 """Inflate the parent only as much as needed for the purposes of 

1395 coalescing""" 

1396 if not VdiType.isCowImage(self.parent.vdi_type): 

1397 return 

1398 inc = self._calcExtraSpaceForCoalescing() 

1399 if inc > 0: 

1400 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1401 self.parent.inflate(self.parent.sizeLV + inc) 

1402 

1403 @override 

1404 def updateBlockInfo(self) -> Optional[str]: 

1405 if VdiType.isCowImage(self.vdi_type): 

1406 return VDI.updateBlockInfo(self) 

1407 return None 

1408 

1409 @override 

1410 def rename(self, uuid) -> None: 

1411 oldUuid = self.uuid 

1412 oldLVName = self.fileName 

1413 VDI.rename(self, uuid) 

1414 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1415 self.path = os.path.join(self.sr.path, self.fileName) 

1416 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1417 

1418 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1419 if self.sr.lvActivator.get(oldUuid, False): 

1420 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1421 

1422 ns = NS_PREFIX_LVM + self.sr.uuid 

1423 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1424 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1425 RefCounter.reset(oldUuid, ns) 

1426 

1427 @override 

1428 def delete(self) -> None: 

1429 if len(self.children) > 0: 

1430 raise util.SMException("VDI %s has children, can't delete" % \ 

1431 self.uuid) 

1432 self.sr.lock() 

1433 try: 

1434 self.sr.lvmCache.remove(self.fileName) 

1435 self.sr.forgetVDI(self.uuid) 

1436 finally: 

1437 self.sr.unlock() 

1438 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1439 VDI.delete(self) 

1440 

1441 @override 

1442 def getSizePhys(self) -> int: 

1443 if self._sizePhys == -1: 

1444 self._loadInfoSizePhys() 

1445 return self._sizePhys 

1446 

1447 def _loadInfoSizePhys(self): 

1448 """Get the physical utilization of the COW image file. We do it individually 

1449 (and not using the COW batch scanner) as an optimization: this info is 

1450 relatively expensive and we need it only for VDI's involved in 

1451 coalescing.""" 

1452 if not VdiType.isCowImage(self.vdi_type): 

1453 return 

1454 self._activate() 

1455 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1456 if self._sizePhys <= 0: 

1457 raise util.SMException("phys size of %s = %d" % \ 

1458 (self, self._sizePhys)) 

1459 

1460 @override 

1461 def getAllocatedSize(self) -> int: 

1462 if self._sizeAllocated == -1: 

1463 self._loadInfoSizeAllocated() 

1464 return self._sizeAllocated 

1465 

1466 def _loadInfoSizeAllocated(self): 

1467 """ 

1468 Get the allocated size of the COW volume. 

1469 """ 

1470 if not VdiType.isCowImage(self.vdi_type): 

1471 return 

1472 self._activate() 

1473 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1474 

1475 @override 

1476 def _loadInfoHidden(self) -> None: 

1477 if not VdiType.isCowImage(self.vdi_type): 

1478 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1479 else: 

1480 VDI._loadInfoHidden(self) 

1481 

1482 @override 

1483 def _setHidden(self, hidden=True) -> None: 

1484 if not VdiType.isCowImage(self.vdi_type): 

1485 self._hidden = None 

1486 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1487 self._hidden = hidden 

1488 else: 

1489 VDI._setHidden(self, hidden) 

1490 

1491 @override 

1492 def __str__(self) -> str: 

1493 strType = self.vdi_type 

1494 if self.vdi_type == VdiType.RAW: 

1495 strType = "RAW" 

1496 strHidden = "" 

1497 if self.isHidden(): 

1498 strHidden = "*" 

1499 strSizePhys = "" 

1500 if self._sizePhys > 0: 

1501 strSizePhys = Util.num2str(self._sizePhys) 

1502 strSizeAllocated = "" 

1503 if self._sizeAllocated >= 0: 

1504 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1505 strActive = "n" 

1506 if self.lvActive: 

1507 strActive = "a" 

1508 if self.lvOpen: 

1509 strActive += "o" 

1510 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1511 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1512 Util.num2str(self.sizeLV), strActive) 

1513 

1514 @override 

1515 def validate(self, fast=False) -> None: 

1516 if VdiType.isCowImage(self.vdi_type): 

1517 VDI.validate(self, fast) 

1518 

1519 def _setChainRw(self) -> List[str]: 

1520 """ 

1521 Set the readonly LV and children writable. 

1522 It's needed because the coalesce can be done by tapdisk directly 

1523 and it will need to write parent information for children. 

1524 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1525 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1526 """ 

1527 was_ro = [] 

1528 if self.lvReadonly: 

1529 self.sr.lvmCache.setReadonly(self.fileName, False) 

1530 was_ro.append(self.fileName) 

1531 

1532 for child in self.children: 

1533 if child.lvReadonly: 

1534 self.sr.lvmCache.setReadonly(child.fileName, False) 

1535 was_ro.append(child.fileName) 

1536 

1537 return was_ro 

1538 

1539 def _setChainRo(self, was_ro: List[str]) -> None: 

1540 """Set the list of LV in parameters to readonly""" 

1541 for lvName in was_ro: 

1542 self.sr.lvmCache.setReadonly(lvName, True) 

1543 

1544 @override 

1545 def _doCoalesce(self) -> None: 

1546 """LVMVDI parents must first be activated, inflated, and made writable""" 

1547 was_ro = [] 

1548 try: 

1549 self._activateChain() 

1550 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1551 self.parent.validate() 

1552 self.inflateParentForCoalesce() 

1553 was_ro = self._setChainRw() 

1554 VDI._doCoalesce(self) 

1555 finally: 

1556 self.parent._loadInfoSizePhys() 

1557 self.parent.deflate() 

1558 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1559 self._setChainRo(was_ro) 

1560 

1561 @override 

1562 def _setParent(self, parent) -> None: 

1563 self._activate() 

1564 if self.lvReadonly: 

1565 self.sr.lvmCache.setReadonly(self.fileName, False) 

1566 

1567 try: 

1568 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1569 finally: 

1570 if self.lvReadonly: 

1571 self.sr.lvmCache.setReadonly(self.fileName, True) 

1572 self._deactivate() 

1573 self.parent = parent 

1574 self.parentUuid = parent.uuid 

1575 parent.children.append(self) 

1576 try: 

1577 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1578 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1579 (self.uuid, self.parentUuid)) 

1580 except: 

1581 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1582 (self.parentUuid, self.uuid)) 

1583 

1584 def _activate(self): 

1585 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1586 

1587 def _activateChain(self): 

1588 vdi = self 

1589 while vdi: 

1590 vdi._activate() 

1591 vdi = vdi.parent 

1592 

1593 def _deactivate(self): 

1594 self.sr.lvActivator.deactivate(self.uuid, False) 

1595 

1596 @override 

1597 def _ensureParentActiveForRelink(self) -> None: 

1598 self.parent._activate() 

1599 

1600 @override 

1601 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1602 "ensure the virtual size of 'self' is at least 'size'" 

1603 self._activate() 

1604 if VdiType.isCowImage(self.vdi_type): 

1605 VDI._increaseSizeVirt(self, size, atomic) 

1606 return 

1607 

1608 # raw VDI case 

1609 offset = self.sizeLV 

1610 if self.sizeVirt < size: 

1611 oldSize = self.sizeLV 

1612 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1613 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1614 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1615 offset = oldSize 

1616 unfinishedZero = False 

1617 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1618 if jval: 

1619 unfinishedZero = True 

1620 offset = int(jval) 

1621 length = self.sizeLV - offset 

1622 if not length: 

1623 return 

1624 

1625 if unfinishedZero: 

1626 Util.log(" ==> Redoing unfinished zeroing out") 

1627 else: 

1628 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1629 str(offset)) 

1630 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1631 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1632 func = lambda: util.zeroOut(self.path, offset, length) 

1633 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1634 VDI.POLL_INTERVAL, 0) 

1635 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1636 

1637 @override 

1638 def _setSizeVirt(self, size) -> None: 

1639 """WARNING: do not call this method directly unless all VDIs in the 

1640 subtree are guaranteed to be unplugged (and remain so for the duration 

1641 of the operation): this operation is only safe for offline COW images.""" 

1642 self._activate() 

1643 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1644 try: 

1645 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1646 finally: 

1647 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1648 

1649 @override 

1650 def _queryCowBlocks(self) -> bytes: 

1651 self._activate() 

1652 return VDI._queryCowBlocks(self) 

1653 

1654 @override 

1655 def getParent(self) -> str: 

1656 self._activate() 

1657 parent = VDI.getParent(self) 

1658 self._deactivate() 

1659 return parent 

1660 

1661 @override 

1662 def _calcExtraSpaceForCoalescing(self) -> int: 

1663 if not VdiType.isCowImage(self.parent.vdi_type): 

1664 return 0 # raw parents are never deflated in the first place 

1665 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1666 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1667 return sizeCoalesced - self.parent.sizeLV 

1668 

1669 @override 

1670 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1671 """How much extra space in the SR will be required to 

1672 [live-]leaf-coalesce this VDI""" 

1673 # we can deflate the leaf to minimize the space requirements 

1674 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1675 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1676 

1677 @override 

1678 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1679 return self._calcExtraSpaceForCoalescing() + \ 

1680 lvutil.calcSizeLV(self.getSizePhys()) 

1681 

1682 

1683class LinstorVDI(VDI): 

1684 """Object representing a VDI in a LINSTOR SR""" 

1685 

1686 VOLUME_LOCK_TIMEOUT = 30 

1687 

1688 @override 

1689 def load(self, info=None) -> None: 

1690 self.parentUuid = info.parentUuid 

1691 self.scanError = True 

1692 self.parent = None 

1693 self.children = [] 

1694 

1695 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1696 self.path = self.sr._linstor.build_device_path(self.fileName) 

1697 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1698 

1699 if not info: 

1700 try: 

1701 info = self.linstorcowutil.get_info(self.uuid) 

1702 except util.SMException: 

1703 Util.log( 

1704 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1705 ) 

1706 return 

1707 

1708 self.parentUuid = info.parentUuid 

1709 self.sizeVirt = info.sizeVirt 

1710 self._sizePhys = -1 

1711 self._sizeAllocated = -1 

1712 self.drbd_size = -1 

1713 self._hidden = info.hidden 

1714 self.scanError = False 

1715 

1716 @override 

1717 def getSizePhys(self, fetch=False) -> int: 

1718 if self._sizePhys < 0 or fetch: 

1719 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1720 return self._sizePhys 

1721 

1722 def getDrbdSize(self, fetch=False): 

1723 if self.drbd_size < 0 or fetch: 

1724 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1725 return self.drbd_size 

1726 

1727 @override 

1728 def getAllocatedSize(self) -> int: 

1729 if self._sizeAllocated == -1: 

1730 if VdiType.isCowImage(self.vdi_type): 

1731 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1732 return self._sizeAllocated 

1733 

1734 def inflate(self, size): 

1735 if not VdiType.isCowImage(self.vdi_type): 

1736 return 

1737 self.sr.lock() 

1738 try: 

1739 # Ensure we use the real DRBD size and not the cached one. 

1740 # Why? Because this attribute can be changed if volume is resized by user. 

1741 self.drbd_size = self.getDrbdSize(fetch=True) 

1742 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1743 finally: 

1744 self.sr.unlock() 

1745 self.drbd_size = -1 

1746 self._sizePhys = -1 

1747 self._sizeAllocated = -1 

1748 

1749 def deflate(self): 

1750 if not VdiType.isCowImage(self.vdi_type): 

1751 return 

1752 self.sr.lock() 

1753 try: 

1754 # Ensure we use the real sizes and not the cached info. 

1755 self.drbd_size = self.getDrbdSize(fetch=True) 

1756 self._sizePhys = self.getSizePhys(fetch=True) 

1757 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1758 finally: 

1759 self.sr.unlock() 

1760 self.drbd_size = -1 

1761 self._sizePhys = -1 

1762 self._sizeAllocated = -1 

1763 

1764 def inflateFully(self): 

1765 if VdiType.isCowImage(self.vdi_type): 

1766 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1767 

1768 @override 

1769 def rename(self, uuid) -> None: 

1770 Util.log('Renaming {} -> {} (path={})'.format( 

1771 self.uuid, uuid, self.path 

1772 )) 

1773 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1774 VDI.rename(self, uuid) 

1775 

1776 @override 

1777 def delete(self) -> None: 

1778 if len(self.children) > 0: 

1779 raise util.SMException( 

1780 'VDI {} has children, can\'t delete'.format(self.uuid) 

1781 ) 

1782 self.sr.lock() 

1783 try: 

1784 self.sr._linstor.destroy_volume(self.uuid) 

1785 self.sr.forgetVDI(self.uuid) 

1786 finally: 

1787 self.sr.unlock() 

1788 VDI.delete(self) 

1789 

1790 @override 

1791 def validate(self, fast=False) -> None: 

1792 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1793 raise util.SMException('COW image {} corrupted'.format(self)) 

1794 

1795 @override 

1796 def pause(self, failfast=False) -> None: 

1797 self.sr._linstor.ensure_volume_is_not_locked( 

1798 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1799 ) 

1800 return super(LinstorVDI, self).pause(failfast) 

1801 

1802 @override 

1803 def coalesce(self) -> int: 

1804 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1805 # Using another exception we can't execute the next coalesce calls. 

1806 return self.linstorcowutil.force_coalesce(self.path) 

1807 

1808 @override 

1809 def getParent(self) -> str: 

1810 return self.linstorcowutil.get_parent( 

1811 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1812 ) 

1813 

1814 @override 

1815 def repair(self, parent_uuid) -> None: 

1816 self.linstorcowutil.force_repair( 

1817 self.sr._linstor.get_device_path(parent_uuid) 

1818 ) 

1819 

1820 @override 

1821 def _relinkSkip(self) -> None: 

1822 abortFlag = IPCFlag(self.sr.uuid) 

1823 for child in self.children: 

1824 if abortFlag.test(FLAG_TYPE_ABORT): 

1825 raise AbortException('Aborting due to signal') 

1826 Util.log( 

1827 ' Relinking {} from {} to {}'.format( 

1828 child, self, self.parent 

1829 ) 

1830 ) 

1831 

1832 session = child.sr.xapi.session 

1833 sr_uuid = child.sr.uuid 

1834 vdi_uuid = child.uuid 

1835 try: 

1836 self.sr._linstor.ensure_volume_is_not_locked( 

1837 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1838 ) 

1839 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1840 child._setParent(self.parent) 

1841 finally: 

1842 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1843 self.children = [] 

1844 

1845 @override 

1846 def _setParent(self, parent) -> None: 

1847 self.sr._linstor.get_device_path(self.uuid) 

1848 self.linstorcowutil.force_parent(self.path, parent.path) 

1849 self.parent = parent 

1850 self.parentUuid = parent.uuid 

1851 parent.children.append(self) 

1852 try: 

1853 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1854 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1855 (self.uuid, self.parentUuid)) 

1856 except: 

1857 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1858 (self.uuid, self.parentUuid)) 

1859 

1860 @override 

1861 def _doCoalesce(self) -> None: 

1862 try: 

1863 self._activateChain() 

1864 self.parent.validate() 

1865 self._inflateParentForCoalesce() 

1866 VDI._doCoalesce(self) 

1867 finally: 

1868 self.parent.deflate() 

1869 

1870 def _activateChain(self): 

1871 vdi = self 

1872 while vdi: 

1873 try: 

1874 p = self.sr._linstor.get_device_path(vdi.uuid) 

1875 except Exception as e: 

1876 # Use SMException to skip coalesce. 

1877 # Otherwise the GC is stopped... 

1878 raise util.SMException(str(e)) 

1879 vdi = vdi.parent 

1880 

1881 @override 

1882 def _setHidden(self, hidden=True) -> None: 

1883 HIDDEN_TAG = 'hidden' 

1884 

1885 if not VdiType.isCowImage(self.vdi_type): 

1886 self._hidden = None 

1887 self.sr._linstor.update_volume_metadata(self.uuid, { 

1888 HIDDEN_TAG: hidden 

1889 }) 

1890 self._hidden = hidden 

1891 else: 

1892 VDI._setHidden(self, hidden) 

1893 

1894 @override 

1895 def _increaseSizeVirt(self, size, atomic=True): 

1896 if self.vdi_type == VdiType.RAW: 

1897 offset = self.drbd_size 

1898 if self.sizeVirt < size: 

1899 oldSize = self.drbd_size 

1900 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1901 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1902 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1903 offset = oldSize 

1904 unfinishedZero = False 

1905 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1906 if jval: 

1907 unfinishedZero = True 

1908 offset = int(jval) 

1909 length = self.drbd_size - offset 

1910 if not length: 

1911 return 

1912 

1913 if unfinishedZero: 

1914 Util.log(" ==> Redoing unfinished zeroing out") 

1915 else: 

1916 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1917 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1918 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1919 func = lambda: util.zeroOut(self.path, offset, length) 

1920 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1921 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1922 return 

1923 

1924 if self.sizeVirt >= size: 

1925 return 

1926 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1927 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1928 

1929 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1930 if (size <= msize): 

1931 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1932 else: 

1933 if atomic: 

1934 vdiList = self._getAllSubtree() 

1935 self.sr.lock() 

1936 try: 

1937 self.sr.pauseVDIs(vdiList) 

1938 try: 

1939 self._setSizeVirt(size) 

1940 finally: 

1941 self.sr.unpauseVDIs(vdiList) 

1942 finally: 

1943 self.sr.unlock() 

1944 else: 

1945 self._setSizeVirt(size) 

1946 

1947 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1948 

1949 @override 

1950 def _setSizeVirt(self, size) -> None: 

1951 jfile = self.uuid + '-jvhd' 

1952 self.sr._linstor.create_volume( 

1953 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1954 ) 

1955 try: 

1956 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1957 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1958 finally: 

1959 try: 

1960 self.sr._linstor.destroy_volume(jfile) 

1961 except Exception: 

1962 # We can ignore it, in any case this volume is not persistent. 

1963 pass 

1964 

1965 @override 

1966 def _queryCowBlocks(self) -> bytes: 

1967 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1968 

1969 def _inflateParentForCoalesce(self): 

1970 if not VdiType.isCowImage(self.parent.vdi_type): 

1971 return 

1972 inc = self._calcExtraSpaceForCoalescing() 

1973 if inc > 0: 

1974 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1975 

1976 @override 

1977 def _calcExtraSpaceForCoalescing(self) -> int: 

1978 if not VdiType.isCowImage(self.parent.vdi_type): 

1979 return 0 

1980 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1981 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1982 return size_coalesced - self.parent.getDrbdSize() 

1983 

1984 @override 

1985 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1986 assert self.getDrbdSize() > 0 

1987 assert self.getSizePhys() > 0 

1988 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1989 assert deflate_diff >= 0 

1990 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1991 

1992 @override 

1993 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1994 assert self.getSizePhys() > 0 

1995 return self._calcExtraSpaceForCoalescing() + \ 

1996 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1997 

1998################################################################################ 

1999# 

2000# SR 

2001# 

2002class SR(object): 

2003 class LogFilter: 

2004 def __init__(self, sr): 

2005 self.sr = sr 

2006 self.stateLogged = False 

2007 self.prevState = {} 

2008 self.currState = {} 

2009 

2010 def logState(self): 

2011 changes = "" 

2012 self.currState.clear() 

2013 for vdi in self.sr.vdiTrees: 

2014 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

2015 if not self.prevState.get(vdi.uuid) or \ 

2016 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

2017 changes += self.currState[vdi.uuid] 

2018 

2019 for uuid in self.prevState: 

2020 if not self.currState.get(uuid): 

2021 changes += "Tree %s gone\n" % uuid 

2022 

2023 result = "SR %s (%d VDIs in %d COW trees): " % \ 

2024 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

2025 

2026 if len(changes) > 0: 

2027 if self.stateLogged: 

2028 result += "showing only COW trees that changed:" 

2029 result += "\n%s" % changes 

2030 else: 

2031 result += "no changes" 

2032 

2033 for line in result.split("\n"): 

2034 Util.log("%s" % line) 

2035 self.prevState.clear() 

2036 for key, val in self.currState.items(): 

2037 self.prevState[key] = val 

2038 self.stateLogged = True 

2039 

2040 def logNewVDI(self, uuid): 

2041 if self.stateLogged: 

2042 Util.log("Found new VDI when scanning: %s" % uuid) 

2043 

2044 def _getTreeStr(self, vdi, indent=8): 

2045 treeStr = "%s%s\n" % (" " * indent, vdi) 

2046 for child in vdi.children: 

2047 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

2048 return treeStr 

2049 

2050 TYPE_FILE = "file" 

2051 TYPE_LVHD = "lvhd" 

2052 TYPE_LINSTOR = "linstor" 

2053 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

2054 

2055 LOCK_RETRY_INTERVAL = 3 

2056 LOCK_RETRY_ATTEMPTS = 20 

2057 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2058 

2059 SCAN_RETRY_ATTEMPTS = 3 

2060 

2061 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2062 TMP_RENAME_PREFIX = "OLD_" 

2063 

2064 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2065 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2066 

2067 @staticmethod 

2068 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2069 xapi = XAPI(xapiSession, uuid) 

2070 type = normalizeType(xapi.srRecord["type"]) 

2071 if type == SR.TYPE_FILE: 

2072 return FileSR(uuid, xapi, createLock, force) 

2073 elif type == SR.TYPE_LVHD: 

2074 return LVMSR(uuid, xapi, createLock, force) 

2075 elif type == SR.TYPE_LINSTOR: 

2076 return LinstorSR(uuid, xapi, createLock, force) 

2077 raise util.SMException("SR type %s not recognized" % type) 

2078 

2079 def __init__(self, uuid, xapi, createLock, force): 

2080 self.logFilter = self.LogFilter(self) 

2081 self.uuid = uuid 

2082 self.path = "" 

2083 self.name = "" 

2084 self.vdis = {} 

2085 self.vdiTrees = [] 

2086 self.journaler = None 

2087 self.xapi = xapi 

2088 self._locked = 0 

2089 self._srLock = None 

2090 if createLock: 2090 ↛ 2091line 2090 didn't jump to line 2091, because the condition on line 2090 was never true

2091 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2092 else: 

2093 Util.log("Requested no SR locking") 

2094 self.name = self.xapi.srRecord["name_label"] 

2095 self._failedCoalesceTargets = [] 

2096 

2097 if not self.xapi.isPluggedHere(): 

2098 if force: 2098 ↛ 2099line 2098 didn't jump to line 2099, because the condition on line 2098 was never true

2099 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2100 else: 

2101 if not self.wait_for_plug(): 

2102 raise util.SMException("SR %s not attached on this host" % uuid) 

2103 

2104 if force: 2104 ↛ 2105line 2104 didn't jump to line 2105, because the condition on line 2104 was never true

2105 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2106 elif not self.xapi.isMaster(): 2106 ↛ 2107line 2106 didn't jump to line 2107, because the condition on line 2106 was never true

2107 raise util.SMException("This host is NOT master, will not run") 

2108 

2109 self.no_space_candidates = {} 

2110 

2111 def msg_cleared(self, xapi_session, msg_ref): 

2112 try: 

2113 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2114 except XenAPI.Failure: 

2115 return True 

2116 

2117 return msg is None 

2118 

2119 def check_no_space_candidates(self): 

2120 xapi_session = self.xapi.getSession() 

2121 

2122 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2123 if self.no_space_candidates: 

2124 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2125 util.SMlog("Could not coalesce due to a lack of space " 

2126 f"in SR {self.uuid}") 

2127 msg_body = ("Unable to perform data coalesce due to a lack " 

2128 f"of space in SR {self.uuid}") 

2129 msg_id = xapi_session.xenapi.message.create( 

2130 'SM_GC_NO_SPACE', 

2131 3, 

2132 "SR", 

2133 self.uuid, 

2134 msg_body) 

2135 xapi_session.xenapi.SR.remove_from_sm_config( 

2136 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2137 xapi_session.xenapi.SR.add_to_sm_config( 

2138 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2139 

2140 for candidate in self.no_space_candidates.values(): 

2141 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2142 elif msg_id is not None: 

2143 # Everything was coalescable, remove the message 

2144 xapi_session.xenapi.SR.remove_from_sm_config(self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2145 xapi_session.xenapi.message.destroy(msg_id) 

2146 

2147 def clear_no_space_msg(self, vdi): 

2148 msg_id = None 

2149 try: 

2150 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2151 except XenAPI.Failure: 

2152 pass 

2153 

2154 self.no_space_candidates.pop(vdi.uuid, None) 

2155 if msg_id is not None: 2155 ↛ exitline 2155 didn't return from function 'clear_no_space_msg', because the condition on line 2155 was never false

2156 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2157 

2158 

2159 def wait_for_plug(self): 

2160 for _ in range(1, 10): 

2161 time.sleep(2) 

2162 if self.xapi.isPluggedHere(): 

2163 return True 

2164 return False 

2165 

2166 def gcEnabled(self, refresh=True): 

2167 if refresh: 

2168 self.xapi.srRecord = \ 

2169 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2170 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2171 Util.log("GC is disabled for this SR, abort") 

2172 return False 

2173 return True 

2174 

2175 def scan(self, force=False) -> None: 

2176 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2177 update VDI objects if they already exist""" 

2178 pass 

2179 

2180 def scanLocked(self, force=False): 

2181 self.lock() 

2182 try: 

2183 self.scan(force) 

2184 finally: 

2185 self.unlock() 

2186 

2187 def getVDI(self, uuid): 

2188 return self.vdis.get(uuid) 

2189 

2190 def hasWork(self): 

2191 if len(self.findGarbage()) > 0: 

2192 return True 

2193 if self.findCoalesceable(): 

2194 return True 

2195 if self.findLeafCoalesceable(): 

2196 return True 

2197 if self.needUpdateBlockInfo(): 

2198 return True 

2199 return False 

2200 

2201 def findCoalesceable(self): 

2202 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2203 (choosing one among all coalesceable candidates according to some 

2204 criteria) or None if there is no VDI that could be coalesced""" 

2205 

2206 candidates = [] 

2207 

2208 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2209 if srSwitch == "false": 

2210 Util.log("Coalesce disabled for this SR") 

2211 return candidates 

2212 

2213 # finish any VDI for which a relink journal entry exists first 

2214 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2215 for uuid in journals: 

2216 vdi = self.getVDI(uuid) 

2217 if vdi and vdi not in self._failedCoalesceTargets: 

2218 return vdi 

2219 

2220 for vdi in self.vdis.values(): 

2221 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2222 candidates.append(vdi) 

2223 Util.log("%s is coalescable" % vdi.uuid) 

2224 

2225 self.xapi.update_task_progress("coalescable", len(candidates)) 

2226 

2227 # pick one in the tallest tree 

2228 treeHeight = dict() 

2229 for c in candidates: 

2230 height = c.getTreeRoot().getTreeHeight() 

2231 if treeHeight.get(height): 

2232 treeHeight[height].append(c) 

2233 else: 

2234 treeHeight[height] = [c] 

2235 

2236 freeSpace = self.getFreeSpace() 

2237 heights = list(treeHeight.keys()) 

2238 heights.sort(reverse=True) 

2239 for h in heights: 

2240 for c in treeHeight[h]: 

2241 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2242 if spaceNeeded <= freeSpace: 

2243 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2244 self.clear_no_space_msg(c) 

2245 return c 

2246 else: 

2247 self.no_space_candidates[c.uuid] = c 

2248 Util.log("No space to coalesce %s (free space: %d)" % \ 

2249 (c, freeSpace)) 

2250 return None 

2251 

2252 def getSwitch(self, key): 

2253 return self.xapi.srRecord["other_config"].get(key) 

2254 

2255 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2256 srSwitch = self.getSwitch(switch) 

2257 ret = False 

2258 if srSwitch: 

2259 ret = srSwitch == condition 

2260 

2261 if ret: 

2262 Util.log(fail_msg) 

2263 

2264 return ret 

2265 

2266 def leafCoalesceForbidden(self): 

2267 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2268 "false", 

2269 "Coalesce disabled for this SR") or 

2270 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2271 VDI.LEAFCLSC_DISABLED, 

2272 "Leaf-coalesce disabled for this SR")) 

2273 

2274 def findLeafCoalesceable(self): 

2275 """Find leaf-coalesceable VDIs in each COW tree""" 

2276 

2277 candidates = [] 

2278 if self.leafCoalesceForbidden(): 

2279 return candidates 

2280 

2281 self.gatherLeafCoalesceable(candidates) 

2282 

2283 self.xapi.update_task_progress("coalescable", len(candidates)) 

2284 

2285 freeSpace = self.getFreeSpace() 

2286 for candidate in candidates: 

2287 # check the space constraints to see if leaf-coalesce is actually 

2288 # feasible for this candidate 

2289 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2290 spaceNeededLive = spaceNeeded 

2291 if spaceNeeded > freeSpace: 

2292 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2293 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2294 spaceNeeded = spaceNeededLive 

2295 

2296 if spaceNeeded <= freeSpace: 

2297 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2298 self.clear_no_space_msg(candidate) 

2299 return candidate 

2300 else: 

2301 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2302 (candidate, freeSpace)) 

2303 if spaceNeededLive <= freeSpace: 

2304 Util.log("...but enough space if skip snap-coalesce") 

2305 candidate.setConfig(VDI.DB_LEAFCLSC, 

2306 VDI.LEAFCLSC_OFFLINE) 

2307 self.no_space_candidates[candidate.uuid] = candidate 

2308 

2309 return None 

2310 

2311 def gatherLeafCoalesceable(self, candidates): 

2312 for vdi in self.vdis.values(): 

2313 if not vdi.isLeafCoalesceable(): 

2314 continue 

2315 if vdi in self._failedCoalesceTargets: 

2316 continue 

2317 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2318 Util.log("Skipping reset-on-boot %s" % vdi) 

2319 continue 

2320 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2321 Util.log("Skipping allow_caching=true %s" % vdi) 

2322 continue 

2323 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2324 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2325 continue 

2326 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2327 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2328 continue 

2329 candidates.append(vdi) 

2330 

2331 def coalesce(self, vdi, dryRun=False): 

2332 """Coalesce vdi onto parent""" 

2333 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2334 if dryRun: 2334 ↛ 2335line 2334 didn't jump to line 2335, because the condition on line 2334 was never true

2335 return 

2336 

2337 try: 

2338 self._coalesce(vdi) 

2339 except util.SMException as e: 

2340 if isinstance(e, AbortException): 2340 ↛ 2341line 2340 didn't jump to line 2341, because the condition on line 2340 was never true

2341 self.cleanup() 

2342 raise 

2343 else: 

2344 self._failedCoalesceTargets.append(vdi) 

2345 Util.logException("coalesce") 

2346 Util.log("Coalesce failed, skipping") 

2347 self.cleanup() 

2348 

2349 def coalesceLeaf(self, vdi, dryRun=False): 

2350 """Leaf-coalesce vdi onto parent""" 

2351 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2352 if dryRun: 

2353 return 

2354 

2355 try: 

2356 uuid = vdi.uuid 

2357 try: 

2358 # "vdi" object will no longer be valid after this call 

2359 self._coalesceLeaf(vdi) 

2360 finally: 

2361 vdi = self.getVDI(uuid) 

2362 if vdi: 

2363 vdi.delConfig(vdi.DB_LEAFCLSC) 

2364 except AbortException: 

2365 self.cleanup() 

2366 raise 

2367 except (util.SMException, XenAPI.Failure) as e: 

2368 self._failedCoalesceTargets.append(vdi) 

2369 Util.logException("leaf-coalesce") 

2370 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2371 self.cleanup() 

2372 

2373 def garbageCollect(self, dryRun=False): 

2374 vdiList = self.findGarbage() 

2375 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2376 for vdi in vdiList: 

2377 Util.log(" %s" % vdi) 

2378 if not dryRun: 

2379 self.deleteVDIs(vdiList) 

2380 self.cleanupJournals(dryRun) 

2381 

2382 def findGarbage(self): 

2383 vdiList = [] 

2384 for vdi in self.vdiTrees: 

2385 vdiList.extend(vdi.getAllPrunable()) 

2386 return vdiList 

2387 

2388 def deleteVDIs(self, vdiList) -> None: 

2389 for vdi in vdiList: 

2390 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2391 raise AbortException("Aborting due to signal") 

2392 Util.log("Deleting unlinked VDI %s" % vdi) 

2393 self.deleteVDI(vdi) 

2394 

2395 def deleteVDI(self, vdi) -> None: 

2396 assert(len(vdi.children) == 0) 

2397 del self.vdis[vdi.uuid] 

2398 if vdi.parent: 2398 ↛ 2400line 2398 didn't jump to line 2400, because the condition on line 2398 was never false

2399 vdi.parent.children.remove(vdi) 

2400 if vdi in self.vdiTrees: 2400 ↛ 2401line 2400 didn't jump to line 2401, because the condition on line 2400 was never true

2401 self.vdiTrees.remove(vdi) 

2402 vdi.delete() 

2403 

2404 def forgetVDI(self, vdiUuid) -> None: 

2405 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2406 

2407 def pauseVDIs(self, vdiList) -> None: 

2408 paused = [] 

2409 failed = False 

2410 for vdi in vdiList: 

2411 try: 

2412 vdi.pause() 

2413 paused.append(vdi) 

2414 except: 

2415 Util.logException("pauseVDIs") 

2416 failed = True 

2417 break 

2418 

2419 if failed: 

2420 self.unpauseVDIs(paused) 

2421 raise util.SMException("Failed to pause VDIs") 

2422 

2423 def unpauseVDIs(self, vdiList): 

2424 failed = False 

2425 for vdi in vdiList: 

2426 try: 

2427 vdi.unpause() 

2428 except: 

2429 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2430 failed = True 

2431 if failed: 

2432 raise util.SMException("Failed to unpause VDIs") 

2433 

2434 def getFreeSpace(self) -> int: 

2435 return 0 

2436 

2437 def cleanup(self): 

2438 Util.log("In cleanup") 

2439 return 

2440 

2441 @override 

2442 def __str__(self) -> str: 

2443 if self.name: 

2444 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2445 else: 

2446 ret = "%s" % self.uuid 

2447 return ret 

2448 

2449 def lock(self): 

2450 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2451 signal to avoid deadlocking (trying to acquire the SR lock while the 

2452 lock is held by a process that is trying to abort us)""" 

2453 if not self._srLock: 

2454 return 

2455 

2456 if self._locked == 0: 

2457 abortFlag = IPCFlag(self.uuid) 

2458 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2459 if self._srLock.acquireNoblock(): 

2460 self._locked += 1 

2461 return 

2462 if abortFlag.test(FLAG_TYPE_ABORT): 

2463 raise AbortException("Abort requested") 

2464 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2465 raise util.SMException("Unable to acquire the SR lock") 

2466 

2467 self._locked += 1 

2468 

2469 def unlock(self): 

2470 if not self._srLock: 2470 ↛ 2472line 2470 didn't jump to line 2472, because the condition on line 2470 was never false

2471 return 

2472 assert(self._locked > 0) 

2473 self._locked -= 1 

2474 if self._locked == 0: 

2475 self._srLock.release() 

2476 

2477 def needUpdateBlockInfo(self) -> bool: 

2478 for vdi in self.vdis.values(): 

2479 if vdi.scanError or len(vdi.children) == 0: 

2480 continue 

2481 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2482 return True 

2483 return False 

2484 

2485 def updateBlockInfo(self) -> None: 

2486 for vdi in self.vdis.values(): 

2487 if vdi.scanError or len(vdi.children) == 0: 

2488 continue 

2489 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2490 vdi.updateBlockInfo() 

2491 

2492 def cleanupCoalesceJournals(self): 

2493 """Remove stale coalesce VDI indicators""" 

2494 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2495 for uuid, jval in entries.items(): 

2496 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2497 

2498 def cleanupJournals(self, dryRun=False): 

2499 """delete journal entries for non-existing VDIs""" 

2500 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2501 entries = self.journaler.getAll(t) 

2502 for uuid, jval in entries.items(): 

2503 if self.getVDI(uuid): 

2504 continue 

2505 if t == SR.JRN_CLONE: 

2506 baseUuid, clonUuid = jval.split("_") 

2507 if self.getVDI(baseUuid): 

2508 continue 

2509 Util.log(" Deleting stale '%s' journal entry for %s " 

2510 "(%s)" % (t, uuid, jval)) 

2511 if not dryRun: 

2512 self.journaler.remove(t, uuid) 

2513 

2514 def cleanupCache(self, maxAge=-1) -> int: 

2515 return 0 

2516 

2517 def _hasLeavesAttachedOn(self, vdi: VDI): 

2518 leaves = vdi.getAllLeaves() 

2519 leaves_vdi = [leaf.uuid for leaf in leaves] 

2520 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2521 

2522 def _gc_running_file(self, vdi: VDI): 

2523 run_file = "gc_running_{}".format(vdi.uuid) 

2524 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2525 

2526 def _create_running_file(self, vdi: VDI): 

2527 with open(self._gc_running_file(vdi), "w") as f: 

2528 f.write("1") 

2529 

2530 def _delete_running_file(self, vdi: VDI): 

2531 os.unlink(self._gc_running_file(vdi)) 

2532 

2533 def _coalesce(self, vdi: VDI): 

2534 list_not_to_relink = None 

2535 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2535 ↛ 2538line 2535 didn't jump to line 2538, because the condition on line 2535 was never true

2536 # this means we had done the actual coalescing already and just 

2537 # need to finish relinking and/or refreshing the children 

2538 Util.log("==> Coalesce apparently already done: skipping") 

2539 

2540 # The parent volume must be active for the parent change to occur. 

2541 # The parent volume may become inactive if the host is rebooted. 

2542 vdi._ensureParentActiveForRelink() 

2543 else: 

2544 # JRN_COALESCE is used to check which VDI is being coalesced in 

2545 # order to decide whether to abort the coalesce. We remove the 

2546 # journal as soon as the COW coalesce step is done, because we 

2547 # don't expect the rest of the process to take long 

2548 

2549 if os.path.exists(self._gc_running_file(vdi)): 2549 ↛ 2550line 2549 didn't jump to line 2550, because the condition on line 2549 was never true

2550 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2551 

2552 self._create_running_file(vdi) 

2553 

2554 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2555 host_refs = self._hasLeavesAttachedOn(vdi) 

2556 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2557 if len(host_refs) > 1: 2557 ↛ 2558line 2557 didn't jump to line 2558, because the condition on line 2557 was never true

2558 Util.log("Not coalesceable, chain activated more than once") 

2559 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2560 

2561 try: 

2562 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2562 ↛ 2564line 2562 didn't jump to line 2564, because the condition on line 2562 was never true

2563 #Leaf opened on another host, we need to call online coalesce 

2564 Util.log("Remote coalesce for {}".format(vdi.path)) 

2565 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2566 # If we use a host OpaqueRef to do a online coalesce, this vdi will not need to be relinked since it was done by tapdisk 

2567 # If we coalesce up the chain, we shouldn't need to do the relink at all, we only need to do the relink on the children if their direct parent was the one we were coalescing 

2568 for child in vdi.children: 

2569 real_parent_uuid = child.extractUuid(child.getParent()) 

2570 if real_parent_uuid == vdi.parent.uuid: 

2571 child._update_vhd_parent(real_parent_uuid) # We update the sm-config:vhd-parent value for this VDI since it has already been relinked 

2572 list_not_to_relink = [leaf.uuid for leaf in child.getAllLeaves()] 

2573 else: 

2574 Util.log("Offline coalesce for {}".format(vdi.path)) 

2575 vdi._doCoalesce() 

2576 except Exception as e: 

2577 Util.log("EXCEPTION while coalescing: {}".format(e)) 

2578 self._delete_running_file(vdi) 

2579 raise 

2580 

2581 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2582 self._delete_running_file(vdi) 

2583 

2584 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2585 

2586 # we now need to relink the children: lock the SR to prevent ops 

2587 # like SM.clone from manipulating the VDIs we'll be relinking and 

2588 # rescan the SR first in case the children changed since the last 

2589 # scan 

2590 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2591 

2592 self.lock() 

2593 try: 

2594 vdi.parent._tagChildrenForRelink(list_not_to_relink) 

2595 self.scan() 

2596 vdi._relinkSkip() 

2597 finally: 

2598 self.unlock() 

2599 # Reload the children to leave things consistent 

2600 vdi.parent._reloadChildren(vdi) 

2601 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2602 

2603 self.deleteVDI(vdi) 

2604 

2605 class CoalesceTracker: 

2606 GRACE_ITERATIONS = 2 

2607 MAX_ITERATIONS_NO_PROGRESS = 3 

2608 MAX_ITERATIONS = 20 

2609 MAX_INCREASE_FROM_MINIMUM = 1.2 

2610 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2611 " --> Final size {finSize}" 

2612 

2613 def __init__(self, sr): 

2614 self.itsNoProgress = 0 

2615 self.its = 0 

2616 self.minSize = float("inf") 

2617 self._history = [] 

2618 self.reason = "" 

2619 self.startSize = None 

2620 self.finishSize = None 

2621 self.sr = sr 

2622 self.grace_remaining = self.GRACE_ITERATIONS 

2623 

2624 @property 

2625 def history(self): 

2626 return [x['msg'] for x in self._history] 

2627 

2628 def moving_average(self): 

2629 """ 

2630 Calculate a three point moving average 

2631 """ 

2632 assert len(self._history) >= 3 

2633 

2634 mv_average = sum([x['finalsize'] for x in self._history]) / len(self._history) 

2635 util.SMlog(f'Calculated moving average as {mv_average}') 

2636 return mv_average 

2637 

2638 def abortCoalesce(self, prevSize, curSize): 

2639 self.its += 1 

2640 self._history.append( 

2641 { 

2642 'finalsize': curSize, 

2643 'msg': self.HISTORY_STRING.format(its=self.its, 

2644 initSize=prevSize, 

2645 finSize=curSize) 

2646 } 

2647 ) 

2648 

2649 self.finishSize = curSize 

2650 

2651 if self.startSize is None: 

2652 self.startSize = prevSize 

2653 

2654 if curSize < self.minSize: 

2655 self.minSize = curSize 

2656 

2657 if prevSize < self.minSize: 

2658 self.minSize = prevSize 

2659 

2660 if self.its < 4: 

2661 # Perform at least three iterations 

2662 return False 

2663 

2664 if prevSize >= curSize or curSize < self.moving_average(): 

2665 # We made progress 

2666 return False 

2667 else: 

2668 self.itsNoProgress += 1 

2669 Util.log("No progress, attempt:" 

2670 " {attempt}".format(attempt=self.itsNoProgress)) 

2671 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2672 

2673 if self.its > self.MAX_ITERATIONS: 

2674 max = self.MAX_ITERATIONS 

2675 self.reason = \ 

2676 "Max iterations ({max}) exceeded".format(max=max) 

2677 return True 

2678 

2679 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2680 max = self.MAX_ITERATIONS_NO_PROGRESS 

2681 self.reason = \ 

2682 "No progress made for {max} iterations".format(max=max) 

2683 return True 

2684 

2685 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2686 if curSize > maxSizeFromMin: 

2687 self.grace_remaining -= 1 

2688 if self.grace_remaining == 0: 

2689 self.reason = "Unexpected bump in size," \ 

2690 " compared to minimum achieved" 

2691 

2692 return True 

2693 

2694 return False 

2695 

2696 def printSizes(self): 

2697 Util.log("Starting size was {size}" 

2698 .format(size=self.startSize)) 

2699 Util.log("Final size was {size}" 

2700 .format(size=self.finishSize)) 

2701 Util.log("Minimum size achieved was {size}" 

2702 .format(size=self.minSize)) 

2703 

2704 def printReasoning(self): 

2705 Util.log("Aborted coalesce") 

2706 for hist in self.history: 

2707 Util.log(hist) 

2708 Util.log(self.reason) 

2709 self.printSizes() 

2710 

2711 def printSummary(self): 

2712 if self.its == 0: 

2713 return 

2714 

2715 if self.reason: 2715 ↛ 2716line 2715 didn't jump to line 2716, because the condition on line 2715 was never true

2716 Util.log("Aborted coalesce") 

2717 Util.log(self.reason) 

2718 else: 

2719 Util.log("Coalesce summary") 

2720 

2721 Util.log(f"Performed {self.its} iterations") 

2722 self.printSizes() 

2723 

2724 

2725 def _coalesceLeaf(self, vdi): 

2726 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2727 complete due to external changes, namely vdi_delete and vdi_snapshot 

2728 that alter leaf-coalescibility of vdi""" 

2729 tracker = self.CoalesceTracker(self) 

2730 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2731 prevSizePhys = vdi.getSizePhys() 

2732 if not self._snapshotCoalesce(vdi): 2732 ↛ 2733line 2732 didn't jump to line 2733, because the condition on line 2732 was never true

2733 return False 

2734 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2735 tracker.printReasoning() 

2736 raise util.SMException("VDI {uuid} could not be coalesced" 

2737 .format(uuid=vdi.uuid)) 

2738 tracker.printSummary() 

2739 return self._liveLeafCoalesce(vdi) 

2740 

2741 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2742 speed = None 

2743 total_time = endTime - startTime 

2744 if total_time > 0: 

2745 speed = float(coalescedSize) / float(total_time) 

2746 return speed 

2747 

2748 def writeSpeedToFile(self, speed): 

2749 content = [] 

2750 speedFile = None 

2751 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2752 self.lock() 

2753 try: 

2754 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2755 lines = "" 

2756 if not os.path.isfile(path): 

2757 lines = str(speed) + "\n" 

2758 else: 

2759 speedFile = open(path, "r+") 

2760 content = speedFile.readlines() 

2761 content.append(str(speed) + "\n") 

2762 if len(content) > N_RUNNING_AVERAGE: 

2763 del content[0] 

2764 lines = "".join(content) 

2765 

2766 util.atomicFileWrite(path, VAR_RUN, lines) 

2767 finally: 

2768 if speedFile is not None: 

2769 speedFile.close() 

2770 Util.log("Closing file: {myfile}".format(myfile=path)) 

2771 self.unlock() 

2772 

2773 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2774 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2775 if speed is None: 

2776 return 

2777 

2778 self.writeSpeedToFile(speed) 

2779 

2780 def getStorageSpeed(self): 

2781 speedFile = None 

2782 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2783 self.lock() 

2784 try: 

2785 speed = None 

2786 if os.path.isfile(path): 

2787 speedFile = open(path) 

2788 content = speedFile.readlines() 

2789 try: 

2790 content = [float(i) for i in content] 

2791 except ValueError: 

2792 Util.log("Something bad in the speed log:{log}". 

2793 format(log=speedFile.readlines())) 

2794 return speed 

2795 

2796 if len(content): 

2797 speed = sum(content) / float(len(content)) 

2798 if speed <= 0: 2798 ↛ 2800line 2798 didn't jump to line 2800, because the condition on line 2798 was never true

2799 # Defensive, should be impossible. 

2800 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2801 format(speed=speed, uuid=self.uuid)) 

2802 speed = None 

2803 else: 

2804 Util.log("Speed file empty for SR: {uuid}". 

2805 format(uuid=self.uuid)) 

2806 else: 

2807 Util.log("Speed log missing for SR: {uuid}". 

2808 format(uuid=self.uuid)) 

2809 return speed 

2810 finally: 

2811 if not (speedFile is None): 

2812 speedFile.close() 

2813 self.unlock() 

2814 

2815 def _snapshotCoalesce(self, vdi): 

2816 # Note that because we are not holding any locks here, concurrent SM 

2817 # operations may change this tree under our feet. In particular, vdi 

2818 # can be deleted, or it can be snapshotted. 

2819 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2820 Util.log("Single-snapshotting %s" % vdi) 

2821 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2822 try: 

2823 ret = self.xapi.singleSnapshotVDI(vdi) 

2824 Util.log("Single-snapshot returned: %s" % ret) 

2825 except XenAPI.Failure as e: 

2826 if util.isInvalidVDI(e): 

2827 Util.log("The VDI appears to have been concurrently deleted") 

2828 return False 

2829 raise 

2830 self.scanLocked() 

2831 tempSnap = vdi.parent 

2832 if not tempSnap.isCoalesceable(): 

2833 Util.log("The VDI appears to have been concurrently snapshotted") 

2834 return False 

2835 Util.log("Coalescing parent %s" % tempSnap) 

2836 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2837 sizePhys = vdi.getSizePhys() 

2838 self._coalesce(tempSnap) 

2839 if not vdi.isLeafCoalesceable(): 

2840 Util.log("The VDI tree appears to have been altered since") 

2841 return False 

2842 return True 

2843 

2844 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2845 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2846 self.lock() 

2847 try: 

2848 self.scan() 

2849 if not self.getVDI(vdi.uuid): 

2850 Util.log("The VDI appears to have been deleted meanwhile") 

2851 return False 

2852 if not vdi.isLeafCoalesceable(): 

2853 Util.log("The VDI is no longer leaf-coalesceable") 

2854 return False 

2855 

2856 uuid = vdi.uuid 

2857 vdi.pause(failfast=True) 

2858 try: 

2859 try: 

2860 # "vdi" object will no longer be valid after this call 

2861 self._create_running_file(vdi) 

2862 self._doCoalesceLeaf(vdi) 

2863 except: 

2864 Util.logException("_doCoalesceLeaf") 

2865 self._handleInterruptedCoalesceLeaf() 

2866 raise 

2867 finally: 

2868 vdi = self.getVDI(uuid) 

2869 if vdi: 

2870 vdi.ensureUnpaused() 

2871 self._delete_running_file(vdi) 

2872 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2873 if vdiOld: 

2874 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2875 self.deleteVDI(vdiOld) 

2876 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2877 finally: 

2878 self.cleanup() 

2879 self.unlock() 

2880 self.logFilter.logState() 

2881 return True 

2882 

2883 def _doCoalesceLeaf(self, vdi: VDI): 

2884 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2885 offline/atomic context""" 

2886 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2887 self._prepareCoalesceLeaf(vdi) 

2888 vdi.parent._setHidden(False) 

2889 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2890 vdi.validate(True) 

2891 vdi.parent.validate(True) 

2892 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2893 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2894 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2894 ↛ 2895line 2894 didn't jump to line 2895, because the condition on line 2894 was never true

2895 Util.log("Leaf-coalesce forced, will not use timeout") 

2896 timeout = 0 

2897 vdi._coalesceCowImage(timeout) 

2898 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2899 vdi.parent.validate(True) 

2900 #vdi._verifyContents(timeout / 2) 

2901 

2902 # rename 

2903 vdiUuid = vdi.uuid 

2904 oldName = vdi.fileName 

2905 origParentUuid = vdi.parent.uuid 

2906 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2907 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2908 vdi.parent.rename(vdiUuid) 

2909 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2910 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2911 

2912 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2913 # garbage 

2914 

2915 # update the VDI record 

2916 if vdi.parent.vdi_type == VdiType.RAW: 2916 ↛ 2917line 2916 didn't jump to line 2917, because the condition on line 2916 was never true

2917 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2918 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2919 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2920 

2921 self._updateNode(vdi) 

2922 

2923 # delete the obsolete leaf & inflate the parent (in that order, to 

2924 # minimize free space requirements) 

2925 parent = vdi.parent 

2926 vdi._setHidden(True) 

2927 vdi.parent.children = [] 

2928 vdi.parent = None 

2929 

2930 if parent.parent is None: 

2931 parent.delConfig(VDI.DB_VDI_PARENT) 

2932 

2933 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2934 freeSpace = self.getFreeSpace() 

2935 if freeSpace < extraSpace: 2935 ↛ 2938line 2935 didn't jump to line 2938, because the condition on line 2935 was never true

2936 # don't delete unless we need the space: deletion is time-consuming 

2937 # because it requires contacting the slaves, and we're paused here 

2938 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2939 self.deleteVDI(vdi) 

2940 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2941 

2942 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2943 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2944 

2945 self.forgetVDI(origParentUuid) 

2946 self._finishCoalesceLeaf(parent) 

2947 self._updateSlavesOnResize(parent) 

2948 

2949 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2950 assert(VdiType.isCowImage(parent.vdi_type)) 

2951 extra = child.getSizePhys() - parent.getSizePhys() 

2952 if extra < 0: 2952 ↛ 2953line 2952 didn't jump to line 2953, because the condition on line 2952 was never true

2953 extra = 0 

2954 return extra 

2955 

2956 def _prepareCoalesceLeaf(self, vdi) -> None: 

2957 pass 

2958 

2959 def _updateNode(self, vdi) -> None: 

2960 pass 

2961 

2962 def _finishCoalesceLeaf(self, parent) -> None: 

2963 pass 

2964 

2965 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2966 pass 

2967 

2968 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2969 pass 

2970 

2971 def _updateSlavesOnResize(self, vdi) -> None: 

2972 pass 

2973 

2974 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2975 for uuid in list(self.vdis.keys()): 

2976 if not uuid in uuidsPresent: 

2977 Util.log("VDI %s disappeared since last scan" % \ 

2978 self.vdis[uuid]) 

2979 del self.vdis[uuid] 

2980 

2981 def _handleInterruptedCoalesceLeaf(self) -> None: 

2982 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2983 inconsistent state. If the old-leaf VDI is still present, we revert the 

2984 operation (in case the original error is persistent); otherwise we must 

2985 finish the operation""" 

2986 pass 

2987 

2988 def _buildTree(self, force): 

2989 self.vdiTrees = [] 

2990 for vdi in self.vdis.values(): 

2991 if vdi.parentUuid: 

2992 parent = self.getVDI(vdi.parentUuid) 

2993 if not parent: 

2994 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2995 self.vdiTrees.append(vdi) 

2996 continue 

2997 if force: 

2998 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2999 (vdi.parentUuid, vdi.uuid)) 

3000 self.vdiTrees.append(vdi) 

3001 continue 

3002 else: 

3003 raise util.SMException("Parent VDI %s of %s not " \ 

3004 "found" % (vdi.parentUuid, vdi.uuid)) 

3005 vdi.parent = parent 

3006 parent.children.append(vdi) 

3007 else: 

3008 self.vdiTrees.append(vdi) 

3009 

3010 

3011class FileSR(SR): 

3012 TYPE = SR.TYPE_FILE 

3013 CACHE_FILE_EXT = ".vhdcache" 

3014 # cache cleanup actions 

3015 CACHE_ACTION_KEEP = 0 

3016 CACHE_ACTION_REMOVE = 1 

3017 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

3018 

3019 def __init__(self, uuid, xapi, createLock, force): 

3020 SR.__init__(self, uuid, xapi, createLock, force) 

3021 self.path = "/var/run/sr-mount/%s" % self.uuid 

3022 self.journaler = fjournaler.Journaler(self.path) 

3023 

3024 @override 

3025 def scan(self, force=False) -> None: 

3026 if not util.pathexists(self.path): 

3027 raise util.SMException("directory %s not found!" % self.uuid) 

3028 

3029 uuidsPresent: List[str] = [] 

3030 

3031 for vdi_type in VDI_COW_TYPES: 

3032 scan_result = self._scan(vdi_type, force) 

3033 for uuid, image_info in scan_result.items(): 

3034 vdi = self.getVDI(uuid) 

3035 if not vdi: 

3036 self.logFilter.logNewVDI(uuid) 

3037 vdi = FileVDI(self, uuid, vdi_type) 

3038 self.vdis[uuid] = vdi 

3039 vdi.load(image_info) 

3040 uuidsPresent.extend(scan_result.keys()) 

3041 

3042 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

3043 for rawName in rawList: 

3044 uuid = FileVDI.extractUuid(rawName) 

3045 uuidsPresent.append(uuid) 

3046 vdi = self.getVDI(uuid) 

3047 if not vdi: 

3048 self.logFilter.logNewVDI(uuid) 

3049 vdi = FileVDI(self, uuid, VdiType.RAW) 

3050 self.vdis[uuid] = vdi 

3051 self._removeStaleVDIs(uuidsPresent) 

3052 self._buildTree(force) 

3053 self.logFilter.logState() 

3054 self._handleInterruptedCoalesceLeaf() 

3055 

3056 @override 

3057 def getFreeSpace(self) -> int: 

3058 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

3059 

3060 @override 

3061 def deleteVDIs(self, vdiList) -> None: 

3062 rootDeleted = False 

3063 for vdi in vdiList: 

3064 if not vdi.parent: 

3065 rootDeleted = True 

3066 break 

3067 SR.deleteVDIs(self, vdiList) 

3068 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

3069 self.xapi.markCacheSRsDirty() 

3070 

3071 @override 

3072 def cleanupCache(self, maxAge=-1) -> int: 

3073 """Clean up IntelliCache cache files. Caches for leaf nodes are 

3074 removed when the leaf node no longer exists or its allow-caching 

3075 attribute is not set. Caches for parent nodes are removed when the 

3076 parent node no longer exists or it hasn't been used in more than 

3077 <maxAge> hours. 

3078 Return number of caches removed. 

3079 """ 

3080 numRemoved = 0 

3081 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

3082 Util.log("Found %d cache files" % len(cacheFiles)) 

3083 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

3084 for cacheFile in cacheFiles: 

3085 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

3086 action = self.CACHE_ACTION_KEEP 

3087 rec = self.xapi.getRecordVDI(uuid) 

3088 if not rec: 

3089 Util.log("Cache %s: VDI doesn't exist" % uuid) 

3090 action = self.CACHE_ACTION_REMOVE 

3091 elif rec["managed"] and not rec["allow_caching"]: 

3092 Util.log("Cache %s: caching disabled" % uuid) 

3093 action = self.CACHE_ACTION_REMOVE 

3094 elif not rec["managed"] and maxAge >= 0: 

3095 lastAccess = datetime.datetime.fromtimestamp( \ 

3096 os.path.getatime(os.path.join(self.path, cacheFile))) 

3097 if lastAccess < cutoff: 

3098 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3099 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3100 

3101 if action == self.CACHE_ACTION_KEEP: 

3102 Util.log("Keeping cache %s" % uuid) 

3103 continue 

3104 

3105 lockId = uuid 

3106 parentUuid = None 

3107 if rec and rec["managed"]: 

3108 parentUuid = rec["sm_config"].get("vhd-parent") 

3109 if parentUuid: 

3110 lockId = parentUuid 

3111 

3112 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3113 cacheLock.acquire() 

3114 try: 

3115 if self._cleanupCache(uuid, action): 

3116 numRemoved += 1 

3117 finally: 

3118 cacheLock.release() 

3119 return numRemoved 

3120 

3121 def _cleanupCache(self, uuid, action): 

3122 assert(action != self.CACHE_ACTION_KEEP) 

3123 rec = self.xapi.getRecordVDI(uuid) 

3124 if rec and rec["allow_caching"]: 

3125 Util.log("Cache %s appears to have become valid" % uuid) 

3126 return False 

3127 

3128 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3129 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3130 if tapdisk: 

3131 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3132 Util.log("Cache %s still in use" % uuid) 

3133 return False 

3134 Util.log("Shutting down tapdisk for %s" % fullPath) 

3135 tapdisk.shutdown() 

3136 

3137 Util.log("Deleting file %s" % fullPath) 

3138 os.unlink(fullPath) 

3139 return True 

3140 

3141 def _isCacheFileName(self, name): 

3142 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3143 name.endswith(self.CACHE_FILE_EXT) 

3144 

3145 def _scan(self, vdi_type, force): 

3146 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3147 error = False 

3148 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3149 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3150 for uuid, vdiInfo in scan_result.items(): 

3151 if vdiInfo.error: 

3152 error = True 

3153 break 

3154 if not error: 

3155 return scan_result 

3156 Util.log("Scan error on attempt %d" % i) 

3157 if force: 

3158 return scan_result 

3159 raise util.SMException("Scan error") 

3160 

3161 @override 

3162 def deleteVDI(self, vdi) -> None: 

3163 self._checkSlaves(vdi) 

3164 SR.deleteVDI(self, vdi) 

3165 

3166 def _checkSlaves(self, vdi): 

3167 onlineHosts = self.xapi.getOnlineHosts() 

3168 abortFlag = IPCFlag(self.uuid) 

3169 for pbdRecord in self.xapi.getAttachedPBDs(): 

3170 hostRef = pbdRecord["host"] 

3171 if hostRef == self.xapi._hostRef: 

3172 continue 

3173 if abortFlag.test(FLAG_TYPE_ABORT): 

3174 raise AbortException("Aborting due to signal") 

3175 try: 

3176 self._checkSlave(hostRef, vdi) 

3177 except XenAPI.Failure: 

3178 if hostRef in onlineHosts: 

3179 raise 

3180 

3181 def _checkSlave(self, hostRef, vdi): 

3182 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3183 Util.log("Checking with slave: %s" % repr(call)) 

3184 _host = self.xapi.session.xenapi.host 

3185 text = _host.call_plugin( * call) 

3186 

3187 @override 

3188 def _handleInterruptedCoalesceLeaf(self) -> None: 

3189 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3190 for uuid, parentUuid in entries.items(): 

3191 fileList = os.listdir(self.path) 

3192 childName = uuid + VdiTypeExtension.VHD 

3193 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3194 parentName1 = parentUuid + VdiTypeExtension.VHD 

3195 parentName2 = parentUuid + VdiTypeExtension.RAW 

3196 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3197 if parentPresent or tmpChildName in fileList: 

3198 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3199 else: 

3200 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3201 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3202 vdi = self.getVDI(uuid) 

3203 if vdi: 

3204 vdi.ensureUnpaused() 

3205 

3206 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3207 Util.log("*** UNDO LEAF-COALESCE") 

3208 parent = self.getVDI(parentUuid) 

3209 if not parent: 

3210 parent = self.getVDI(childUuid) 

3211 if not parent: 

3212 raise util.SMException("Neither %s nor %s found" % \ 

3213 (parentUuid, childUuid)) 

3214 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3215 parent.rename(parentUuid) 

3216 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3217 

3218 child = self.getVDI(childUuid) 

3219 if not child: 

3220 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3221 if not child: 

3222 raise util.SMException("Neither %s nor %s found" % \ 

3223 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3224 Util.log("Renaming child back to %s" % childUuid) 

3225 child.rename(childUuid) 

3226 Util.log("Updating the VDI record") 

3227 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3228 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3229 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3230 

3231 if child.isHidden(): 

3232 child._setHidden(False) 

3233 if not parent.isHidden(): 

3234 parent._setHidden(True) 

3235 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3236 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3237 Util.log("*** leaf-coalesce undo successful") 

3238 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3239 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3240 

3241 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3242 Util.log("*** FINISH LEAF-COALESCE") 

3243 vdi = self.getVDI(childUuid) 

3244 if not vdi: 

3245 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3246 raise util.SMException("VDI %s not found" % childUuid) 

3247 try: 

3248 self.forgetVDI(parentUuid) 

3249 except XenAPI.Failure: 

3250 Util.logException('_finishInterruptedCoalesceLeaf') 

3251 pass 

3252 self._updateSlavesOnResize(vdi) 

3253 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3254 Util.log("*** finished leaf-coalesce successfully") 

3255 

3256 

3257class LVMSR(SR): 

3258 TYPE = SR.TYPE_LVHD 

3259 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3260 

3261 def __init__(self, uuid, xapi, createLock, force): 

3262 SR.__init__(self, uuid, xapi, createLock, force) 

3263 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3264 self.path = os.path.join(VG_LOCATION, self.vgName) 

3265 

3266 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3267 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3268 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3269 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3270 

3271 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3272 self.journaler = journaler.Journaler(self.lvmCache) 

3273 

3274 @override 

3275 def deleteVDI(self, vdi) -> None: 

3276 if self.lvActivator.get(vdi.uuid, False): 

3277 self.lvActivator.deactivate(vdi.uuid, False) 

3278 self._checkSlaves(vdi) 

3279 SR.deleteVDI(self, vdi) 

3280 

3281 @override 

3282 def forgetVDI(self, vdiUuid) -> None: 

3283 SR.forgetVDI(self, vdiUuid) 

3284 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3285 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3286 

3287 @override 

3288 def getFreeSpace(self) -> int: 

3289 stats = lvutil._getVGstats(self.vgName) 

3290 return stats['physical_size'] - stats['physical_utilisation'] 

3291 

3292 @override 

3293 def cleanup(self): 

3294 if not self.lvActivator.deactivateAll(): 

3295 Util.log("ERROR deactivating LVs while cleaning up") 

3296 

3297 @override 

3298 def needUpdateBlockInfo(self) -> bool: 

3299 for vdi in self.vdis.values(): 

3300 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3301 continue 

3302 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3303 return True 

3304 return False 

3305 

3306 @override 

3307 def updateBlockInfo(self) -> None: 

3308 numUpdated = 0 

3309 for vdi in self.vdis.values(): 

3310 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3311 continue 

3312 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3313 vdi.updateBlockInfo() 

3314 numUpdated += 1 

3315 if numUpdated: 

3316 # deactivate the LVs back sooner rather than later. If we don't 

3317 # now, by the time this thread gets to deactivations, another one 

3318 # might have leaf-coalesced a node and deleted it, making the child 

3319 # inherit the refcount value and preventing the correct decrement 

3320 self.cleanup() 

3321 

3322 @override 

3323 def scan(self, force=False) -> None: 

3324 vdis = self._scan(force) 

3325 for uuid, vdiInfo in vdis.items(): 

3326 vdi = self.getVDI(uuid) 

3327 if not vdi: 

3328 self.logFilter.logNewVDI(uuid) 

3329 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3330 self.vdis[uuid] = vdi 

3331 vdi.load(vdiInfo) 

3332 self._removeStaleVDIs(vdis.keys()) 

3333 self._buildTree(force) 

3334 self.logFilter.logState() 

3335 self._handleInterruptedCoalesceLeaf() 

3336 

3337 def _scan(self, force): 

3338 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3339 error = False 

3340 self.lvmCache.refresh() 

3341 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3342 for uuid, vdiInfo in vdis.items(): 

3343 if vdiInfo.scanError: 

3344 error = True 

3345 break 

3346 if not error: 

3347 return vdis 

3348 Util.log("Scan error, retrying (%d)" % i) 

3349 if force: 

3350 return vdis 

3351 raise util.SMException("Scan error") 

3352 

3353 @override 

3354 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3355 for uuid in list(self.vdis.keys()): 

3356 if not uuid in uuidsPresent: 

3357 Util.log("VDI %s disappeared since last scan" % \ 

3358 self.vdis[uuid]) 

3359 del self.vdis[uuid] 

3360 if self.lvActivator.get(uuid, False): 

3361 self.lvActivator.remove(uuid, False) 

3362 

3363 @override 

3364 def _liveLeafCoalesce(self, vdi) -> bool: 

3365 """If the parent is raw and the child was resized (virt. size), then 

3366 we'll need to resize the parent, which can take a while due to zeroing 

3367 out of the extended portion of the LV. Do it before pausing the child 

3368 to avoid a protracted downtime""" 

3369 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3370 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3371 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3372 

3373 return SR._liveLeafCoalesce(self, vdi) 

3374 

3375 @override 

3376 def _prepareCoalesceLeaf(self, vdi) -> None: 

3377 vdi._activateChain() 

3378 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3379 vdi.deflate() 

3380 vdi.inflateParentForCoalesce() 

3381 

3382 @override 

3383 def _updateNode(self, vdi) -> None: 

3384 # fix the refcounts: the remaining node should inherit the binary 

3385 # refcount from the leaf (because if it was online, it should remain 

3386 # refcounted as such), but the normal refcount from the parent (because 

3387 # this node is really the parent node) - minus 1 if it is online (since 

3388 # non-leaf nodes increment their normal counts when they are online and 

3389 # we are now a leaf, storing that 1 in the binary refcount). 

3390 ns = NS_PREFIX_LVM + self.uuid 

3391 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3392 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3393 pCnt = pCnt - cBcnt 

3394 assert(pCnt >= 0) 

3395 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3396 

3397 @override 

3398 def _finishCoalesceLeaf(self, parent) -> None: 

3399 if not parent.isSnapshot() or parent.isAttachedRW(): 

3400 parent.inflateFully() 

3401 else: 

3402 parent.deflate() 

3403 

3404 @override 

3405 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3406 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3407 

3408 @override 

3409 def _handleInterruptedCoalesceLeaf(self) -> None: 

3410 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3411 for uuid, parentUuid in entries.items(): 

3412 undo = False 

3413 for prefix in LV_PREFIX.values(): 

3414 parentLV = prefix + parentUuid 

3415 undo = self.lvmCache.checkLV(parentLV) 

3416 if undo: 

3417 break 

3418 

3419 if not undo: 

3420 for prefix in LV_PREFIX.values(): 

3421 tmpChildLV = prefix + uuid 

3422 undo = self.lvmCache.checkLV(tmpChildLV) 

3423 if undo: 

3424 break 

3425 

3426 if undo: 

3427 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3428 else: 

3429 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3430 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3431 vdi = self.getVDI(uuid) 

3432 if vdi: 

3433 vdi.ensureUnpaused() 

3434 

3435 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3436 Util.log("*** UNDO LEAF-COALESCE") 

3437 parent = self.getVDI(parentUuid) 

3438 if not parent: 

3439 parent = self.getVDI(childUuid) 

3440 if not parent: 

3441 raise util.SMException("Neither %s nor %s found" % \ 

3442 (parentUuid, childUuid)) 

3443 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3444 parent.rename(parentUuid) 

3445 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3446 

3447 child = self.getVDI(childUuid) 

3448 if not child: 

3449 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3450 if not child: 

3451 raise util.SMException("Neither %s nor %s found" % \ 

3452 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3453 Util.log("Renaming child back to %s" % childUuid) 

3454 child.rename(childUuid) 

3455 Util.log("Updating the VDI record") 

3456 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3457 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3458 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3459 

3460 # refcount (best effort - assume that it had succeeded if the 

3461 # second rename succeeded; if not, this adjustment will be wrong, 

3462 # leading to a non-deactivation of the LV) 

3463 ns = NS_PREFIX_LVM + self.uuid 

3464 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3465 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3466 pCnt = pCnt + cBcnt 

3467 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3468 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3469 

3470 parent.deflate() 

3471 child.inflateFully() 

3472 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3473 if child.isHidden(): 

3474 child._setHidden(False) 

3475 if not parent.isHidden(): 

3476 parent._setHidden(True) 

3477 if not parent.lvReadonly: 

3478 self.lvmCache.setReadonly(parent.fileName, True) 

3479 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3480 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3481 Util.log("*** leaf-coalesce undo successful") 

3482 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3483 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3484 

3485 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3486 Util.log("*** FINISH LEAF-COALESCE") 

3487 vdi = self.getVDI(childUuid) 

3488 if not vdi: 

3489 raise util.SMException("VDI %s not found" % childUuid) 

3490 vdi.inflateFully() 

3491 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3492 try: 

3493 self.forgetVDI(parentUuid) 

3494 except XenAPI.Failure: 

3495 pass 

3496 self._updateSlavesOnResize(vdi) 

3497 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3498 Util.log("*** finished leaf-coalesce successfully") 

3499 

3500 def _checkSlaves(self, vdi): 

3501 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3502 try to check all slaves, including those that the Agent believes are 

3503 offline, but ignore failures for offline hosts. This is to avoid cases 

3504 where the Agent thinks a host is offline but the host is up.""" 

3505 args = {"vgName": self.vgName, 

3506 "action1": "deactivateNoRefcount", 

3507 "lvName1": vdi.fileName, 

3508 "action2": "cleanupLockAndRefcount", 

3509 "uuid2": vdi.uuid, 

3510 "ns2": NS_PREFIX_LVM + self.uuid} 

3511 onlineHosts = self.xapi.getOnlineHosts() 

3512 abortFlag = IPCFlag(self.uuid) 

3513 for pbdRecord in self.xapi.getAttachedPBDs(): 

3514 hostRef = pbdRecord["host"] 

3515 if hostRef == self.xapi._hostRef: 

3516 continue 

3517 if abortFlag.test(FLAG_TYPE_ABORT): 

3518 raise AbortException("Aborting due to signal") 

3519 Util.log("Checking with slave %s (path %s)" % ( 

3520 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3521 try: 

3522 self.xapi.ensureInactive(hostRef, args) 

3523 except XenAPI.Failure: 

3524 if hostRef in onlineHosts: 

3525 raise 

3526 

3527 @override 

3528 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3529 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3530 if not slaves: 

3531 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3532 child) 

3533 return 

3534 

3535 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3536 args = {"vgName": self.vgName, 

3537 "action1": "deactivateNoRefcount", 

3538 "lvName1": tmpName, 

3539 "action2": "deactivateNoRefcount", 

3540 "lvName2": child.fileName, 

3541 "action3": "refresh", 

3542 "lvName3": child.fileName, 

3543 "action4": "refresh", 

3544 "lvName4": parent.fileName} 

3545 for slave in slaves: 

3546 Util.log("Updating %s, %s, %s on slave %s" % \ 

3547 (tmpName, child.fileName, parent.fileName, 

3548 self.xapi.getRecordHost(slave)['hostname'])) 

3549 text = self.xapi.session.xenapi.host.call_plugin( \ 

3550 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3551 Util.log("call-plugin returned: '%s'" % text) 

3552 

3553 @override 

3554 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3555 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3556 if not slaves: 

3557 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3558 return 

3559 

3560 args = {"vgName": self.vgName, 

3561 "action1": "deactivateNoRefcount", 

3562 "lvName1": oldNameLV, 

3563 "action2": "refresh", 

3564 "lvName2": vdi.fileName, 

3565 "action3": "cleanupLockAndRefcount", 

3566 "uuid3": origParentUuid, 

3567 "ns3": NS_PREFIX_LVM + self.uuid} 

3568 for slave in slaves: 

3569 Util.log("Updating %s to %s on slave %s" % \ 

3570 (oldNameLV, vdi.fileName, 

3571 self.xapi.getRecordHost(slave)['hostname'])) 

3572 text = self.xapi.session.xenapi.host.call_plugin( \ 

3573 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3574 Util.log("call-plugin returned: '%s'" % text) 

3575 

3576 @override 

3577 def _updateSlavesOnResize(self, vdi) -> None: 

3578 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3579 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3580 if not slaves: 

3581 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3582 return 

3583 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3584 vdi.fileName, vdi.uuid, slaves) 

3585 

3586 

3587class LinstorSR(SR): 

3588 TYPE = SR.TYPE_LINSTOR 

3589 

3590 def __init__(self, uuid, xapi, createLock, force): 

3591 if not LINSTOR_AVAILABLE: 

3592 raise util.SMException( 

3593 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3594 ) 

3595 

3596 SR.__init__(self, uuid, xapi, createLock, force) 

3597 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3598 

3599 class LinstorProxy: 

3600 def __init__(self, sr: LinstorSR) -> None: 

3601 self.sr = sr 

3602 

3603 def __getattr__(self, attr: str) -> Any: 

3604 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3605 return getattr(self.sr._linstor, attr) 

3606 

3607 self._linstor_proxy = LinstorProxy(self) 

3608 self._reloadLinstor(journaler_only=True) 

3609 

3610 @override 

3611 def deleteVDI(self, vdi) -> None: 

3612 self._checkSlaves(vdi) 

3613 SR.deleteVDI(self, vdi) 

3614 

3615 @override 

3616 def getFreeSpace(self) -> int: 

3617 return self._linstor.max_volume_size_allowed 

3618 

3619 @override 

3620 def scan(self, force=False) -> None: 

3621 all_vdi_info = self._scan(force) 

3622 for uuid, vdiInfo in all_vdi_info.items(): 

3623 # When vdiInfo is None, the VDI is RAW. 

3624 vdi = self.getVDI(uuid) 

3625 if not vdi: 

3626 self.logFilter.logNewVDI(uuid) 

3627 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW) 

3628 self.vdis[uuid] = vdi 

3629 if vdiInfo: 

3630 vdi.load(vdiInfo) 

3631 self._removeStaleVDIs(all_vdi_info.keys()) 

3632 self._buildTree(force) 

3633 self.logFilter.logState() 

3634 self._handleInterruptedCoalesceLeaf() 

3635 

3636 @override 

3637 def pauseVDIs(self, vdiList) -> None: 

3638 self._linstor.ensure_volume_list_is_not_locked( 

3639 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3640 ) 

3641 return super(LinstorSR, self).pauseVDIs(vdiList) 

3642 

3643 def _reloadLinstor(self, journaler_only=False): 

3644 session = self.xapi.session 

3645 host_ref = util.get_this_host_ref(session) 

3646 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3647 

3648 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3649 if pbd is None: 

3650 raise util.SMException('Failed to find PBD') 

3651 

3652 dconf = session.xenapi.PBD.get_device_config(pbd) 

3653 group_name = dconf['group-name'] 

3654 

3655 controller_uri = get_controller_uri() 

3656 self.journaler = LinstorJournaler( 

3657 controller_uri, group_name, logger=util.SMlog 

3658 ) 

3659 

3660 if journaler_only: 

3661 return 

3662 

3663 self._linstor = LinstorVolumeManager( 

3664 controller_uri, 

3665 group_name, 

3666 repair=True, 

3667 logger=util.SMlog 

3668 ) 

3669 

3670 def _scan(self, force): 

3671 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3672 self._reloadLinstor() 

3673 error = False 

3674 try: 

3675 all_vdi_info = self._load_vdi_info() 

3676 for uuid, vdiInfo in all_vdi_info.items(): 

3677 if vdiInfo and vdiInfo.error: 

3678 error = True 

3679 break 

3680 if not error: 

3681 return all_vdi_info 

3682 Util.log('Scan error, retrying ({})'.format(i)) 

3683 except Exception as e: 

3684 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3685 Util.log(traceback.format_exc()) 

3686 

3687 if force: 

3688 return all_vdi_info 

3689 raise util.SMException('Scan error') 

3690 

3691 def _load_vdi_info(self): 

3692 all_volume_info = self._linstor.get_volumes_with_info() 

3693 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3694 

3695 all_vdi_info = {} 

3696 pending_vdis = [] 

3697 

3698 def handle_fail(vdi_uuid, e): 

3699 Util.log(f" [VDI {vdi_uuid}: failed to load VDI info]: {e}") 

3700 info = CowImageInfo(vdi_uuid) 

3701 info.error = 1 

3702 return info 

3703 

3704 for vdi_uuid, volume_info in all_volume_info.items(): 

3705 vdi_type = VdiType.RAW 

3706 try: 

3707 volume_metadata = volumes_metadata[vdi_uuid] 

3708 if not volume_info.name and not list(volume_metadata.items()): 

3709 continue # Ignore it, probably deleted. 

3710 

3711 if vdi_uuid.startswith('DELETED_'): 

3712 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3713 # We must remove this VDI now without adding it in the VDI list. 

3714 # Otherwise `Relinking` calls and other actions can be launched on it. 

3715 # We don't want that... 

3716 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3717 

3718 self.lock() 

3719 try: 

3720 self._linstor.destroy_volume(vdi_uuid) 

3721 try: 

3722 self.forgetVDI(vdi_uuid) 

3723 except: 

3724 pass 

3725 except Exception as e: 

3726 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3727 finally: 

3728 self.unlock() 

3729 continue 

3730 

3731 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3732 if VdiType.isCowImage(vdi_type): 

3733 pending_vdis.append((vdi_uuid, vdi_type)) 

3734 else: 

3735 all_vdi_info[vdi_uuid] = None 

3736 except Exception as e: 

3737 all_vdi_info[vdi_uuid] = handle_fail(vdi_uuid, e) 

3738 

3739 multi_cowutil = MultiLinstorCowUtil(self._linstor.uri, self._linstor.group_name) 

3740 

3741 def load_info(vdi, multi_cowutil): 

3742 vdi_uuid, vdi_type = vdi 

3743 try: 

3744 vdiInfo = multi_cowutil.get_local_cowutil(vdi_type).get_info(vdi_uuid) 

3745 except Exception as e: 

3746 vdiInfo = handle_fail(vdi_uuid, e) 

3747 vdiInfo.vdiType = vdi_type 

3748 return vdiInfo 

3749 

3750 try: 

3751 for vdiInfo in multi_cowutil.run(load_info, pending_vdis): 

3752 all_vdi_info[vdiInfo.uuid] = vdiInfo 

3753 finally: 

3754 del multi_cowutil 

3755 

3756 return all_vdi_info 

3757 

3758 @override 

3759 def _prepareCoalesceLeaf(self, vdi) -> None: 

3760 vdi._activateChain() 

3761 vdi.deflate() 

3762 vdi._inflateParentForCoalesce() 

3763 

3764 @override 

3765 def _finishCoalesceLeaf(self, parent) -> None: 

3766 if not parent.isSnapshot() or parent.isAttachedRW(): 

3767 parent.inflateFully() 

3768 else: 

3769 parent.deflate() 

3770 

3771 @override 

3772 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3773 return LinstorCowUtil( 

3774 self.xapi.session, self._linstor, parent.vdi_type 

3775 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3776 

3777 def _hasValidDevicePath(self, uuid): 

3778 try: 

3779 self._linstor.get_device_path(uuid) 

3780 except Exception: 

3781 # TODO: Maybe log exception. 

3782 return False 

3783 return True 

3784 

3785 @override 

3786 def _liveLeafCoalesce(self, vdi) -> bool: 

3787 self.lock() 

3788 try: 

3789 self._linstor.ensure_volume_is_not_locked( 

3790 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3791 ) 

3792 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3793 finally: 

3794 self.unlock() 

3795 

3796 @override 

3797 def _handleInterruptedCoalesceLeaf(self) -> None: 

3798 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3799 for uuid, parentUuid in entries.items(): 

3800 if self._hasValidDevicePath(parentUuid) or \ 

3801 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3802 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3803 else: 

3804 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3805 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3806 vdi = self.getVDI(uuid) 

3807 if vdi: 

3808 vdi.ensureUnpaused() 

3809 

3810 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3811 Util.log('*** UNDO LEAF-COALESCE') 

3812 parent = self.getVDI(parentUuid) 

3813 if not parent: 

3814 parent = self.getVDI(childUuid) 

3815 if not parent: 

3816 raise util.SMException( 

3817 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3818 ) 

3819 Util.log( 

3820 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3821 ) 

3822 parent.rename(parentUuid) 

3823 

3824 child = self.getVDI(childUuid) 

3825 if not child: 

3826 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3827 if not child: 

3828 raise util.SMException( 

3829 'Neither {} nor {} found'.format( 

3830 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3831 ) 

3832 ) 

3833 Util.log('Renaming child back to {}'.format(childUuid)) 

3834 child.rename(childUuid) 

3835 Util.log('Updating the VDI record') 

3836 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3837 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3838 

3839 # TODO: Maybe deflate here. 

3840 

3841 if child.isHidden(): 

3842 child._setHidden(False) 

3843 if not parent.isHidden(): 

3844 parent._setHidden(True) 

3845 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3846 Util.log('*** leaf-coalesce undo successful') 

3847 

3848 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3849 Util.log('*** FINISH LEAF-COALESCE') 

3850 vdi = self.getVDI(childUuid) 

3851 if not vdi: 

3852 raise util.SMException('VDI {} not found'.format(childUuid)) 

3853 # TODO: Maybe inflate. 

3854 try: 

3855 self.forgetVDI(parentUuid) 

3856 except XenAPI.Failure: 

3857 pass 

3858 self._updateSlavesOnResize(vdi) 

3859 Util.log('*** finished leaf-coalesce successfully') 

3860 

3861 def _checkSlaves(self, vdi): 

3862 try: 

3863 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3864 for openers in all_openers.values(): 

3865 for opener in openers.values(): 

3866 if opener['process-name'] != 'tapdisk': 

3867 raise util.SMException( 

3868 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3869 ) 

3870 except LinstorVolumeManagerError as e: 

3871 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3872 raise 

3873 

3874 

3875################################################################################ 

3876# 

3877# Helpers 

3878# 

3879def daemonize(): 

3880 pid = os.fork() 

3881 if pid: 

3882 os.waitpid(pid, 0) 

3883 Util.log("New PID [%d]" % pid) 

3884 return False 

3885 os.chdir("/") 

3886 os.setsid() 

3887 pid = os.fork() 

3888 if pid: 

3889 Util.log("Will finish as PID [%d]" % pid) 

3890 os._exit(0) 

3891 for fd in [0, 1, 2]: 

3892 try: 

3893 os.close(fd) 

3894 except OSError: 

3895 pass 

3896 # we need to fill those special fd numbers or pread won't work 

3897 sys.stdin = open("/dev/null", 'r') 

3898 sys.stderr = open("/dev/null", 'w') 

3899 sys.stdout = open("/dev/null", 'w') 

3900 # As we're a new process we need to clear the lock objects 

3901 lock.Lock.clearAll() 

3902 return True 

3903 

3904 

3905def normalizeType(type): 

3906 if type in LVMSR.SUBTYPES: 

3907 type = SR.TYPE_LVHD 

3908 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3909 # temporary while LVHD is symlinked as LVM 

3910 type = SR.TYPE_LVHD 

3911 if type in [ 

3912 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3913 "moosefs", "xfs", "zfs", "largeblock" 

3914 ]: 

3915 type = SR.TYPE_FILE 

3916 if type in ["linstor"]: 

3917 type = SR.TYPE_LINSTOR 

3918 if type not in SR.TYPES: 

3919 raise util.SMException("Unsupported SR type: %s" % type) 

3920 return type 

3921 

3922GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3923 

3924 

3925def _gc_init_file(sr_uuid): 

3926 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3927 

3928 

3929def _create_init_file(sr_uuid): 

3930 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3931 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3932 f.write('1') 

3933 

3934 

3935def _gcLoopPause(sr, dryRun=False, immediate=False): 

3936 if immediate: 

3937 return 

3938 

3939 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3940 # point will just return. Otherwise, fall back on an abortable sleep. 

3941 

3942 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3943 

3944 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3944 ↛ exitline 3944 didn't jump to the function exit

3945 lambda *args: None) 

3946 elif os.path.exists(_gc_init_file(sr.uuid)): 

3947 def abortTest(): 

3948 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3949 

3950 # If time.sleep hangs we are in deep trouble, however for 

3951 # completeness we set the timeout of the abort thread to 

3952 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3953 Util.log("GC active, about to go quiet") 

3954 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3954 ↛ exitline 3954 didn't run the lambda on line 3954

3955 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3956 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3957 Util.log("GC active, quiet period ended") 

3958 

3959 

3960def _gcLoop(sr, dryRun=False, immediate=False): 

3961 if not lockGCActive.acquireNoblock(): 3961 ↛ 3962line 3961 didn't jump to line 3962, because the condition on line 3961 was never true

3962 Util.log("Another GC instance already active, exiting") 

3963 return 

3964 

3965 # Check we're still attached after acquiring locks 

3966 if not sr.xapi.isPluggedHere(): 

3967 Util.log("SR no longer attached, exiting") 

3968 return 

3969 

3970 # Clean up Intellicache files 

3971 sr.cleanupCache() 

3972 

3973 # Track how many we do 

3974 coalesced = 0 

3975 task_status = "success" 

3976 try: 

3977 # Check if any work needs to be done 

3978 if not sr.xapi.isPluggedHere(): 3978 ↛ 3979line 3978 didn't jump to line 3979, because the condition on line 3978 was never true

3979 Util.log("SR no longer attached, exiting") 

3980 return 

3981 sr.scanLocked() 

3982 if not sr.hasWork(): 

3983 Util.log("No work, exiting") 

3984 return 

3985 sr.xapi.create_task( 

3986 "Garbage Collection", 

3987 "Garbage collection for SR %s" % sr.uuid) 

3988 _gcLoopPause(sr, dryRun, immediate=immediate) 

3989 while True: 

3990 if SIGTERM: 

3991 Util.log("Term requested") 

3992 return 

3993 

3994 if not sr.xapi.isPluggedHere(): 3994 ↛ 3995line 3994 didn't jump to line 3995, because the condition on line 3994 was never true

3995 Util.log("SR no longer attached, exiting") 

3996 break 

3997 sr.scanLocked() 

3998 if not sr.hasWork(): 

3999 Util.log("No work, exiting") 

4000 break 

4001 

4002 if not lockGCRunning.acquireNoblock(): 4002 ↛ 4003line 4002 didn't jump to line 4003, because the condition on line 4002 was never true

4003 Util.log("Unable to acquire GC running lock.") 

4004 return 

4005 try: 

4006 if not sr.gcEnabled(): 4006 ↛ 4007line 4006 didn't jump to line 4007, because the condition on line 4006 was never true

4007 break 

4008 

4009 sr.xapi.update_task_progress("done", coalesced) 

4010 

4011 sr.cleanupCoalesceJournals() 

4012 # Create the init file here in case startup is waiting on it 

4013 _create_init_file(sr.uuid) 

4014 sr.scanLocked() 

4015 sr.updateBlockInfo() 

4016 

4017 howmany = len(sr.findGarbage()) 

4018 if howmany > 0: 

4019 Util.log("Found %d orphaned vdis" % howmany) 

4020 sr.lock() 

4021 try: 

4022 sr.garbageCollect(dryRun) 

4023 finally: 

4024 sr.unlock() 

4025 sr.xapi.srUpdate() 

4026 

4027 candidate = sr.findCoalesceable() 

4028 if candidate: 

4029 util.fistpoint.activate( 

4030 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

4031 sr.coalesce(candidate, dryRun) 

4032 sr.xapi.srUpdate() 

4033 coalesced += 1 

4034 continue 

4035 

4036 candidate = sr.findLeafCoalesceable() 

4037 if candidate: 4037 ↛ 4044line 4037 didn't jump to line 4044, because the condition on line 4037 was never false

4038 sr.coalesceLeaf(candidate, dryRun) 

4039 sr.xapi.srUpdate() 

4040 coalesced += 1 

4041 continue 

4042 

4043 finally: 

4044 lockGCRunning.release() 4044 ↛ 4049line 4044 didn't jump to line 4049, because the break on line 4007 wasn't executed

4045 except: 

4046 task_status = "failure" 

4047 raise 

4048 finally: 

4049 sr.xapi.set_task_status(task_status) 

4050 Util.log("GC process exiting, no work left") 

4051 _create_init_file(sr.uuid) 

4052 lockGCActive.release() 

4053 

4054 

4055def _gc(session, srUuid, dryRun=False, immediate=False): 

4056 init(srUuid) 

4057 sr = SR.getInstance(srUuid, session) 

4058 if not sr.gcEnabled(False): 4058 ↛ 4059line 4058 didn't jump to line 4059, because the condition on line 4058 was never true

4059 return 

4060 

4061 try: 

4062 _gcLoop(sr, dryRun, immediate=immediate) 

4063 finally: 

4064 sr.check_no_space_candidates() 

4065 sr.cleanup() 

4066 sr.logFilter.logState() 

4067 del sr.xapi 

4068 

4069 

4070def _abort(srUuid, soft=False): 

4071 """Aborts an GC/coalesce. 

4072 

4073 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4074 soft: If set to True and there is a pending abort signal, the function 

4075 doesn't do anything. If set to False, a new abort signal is issued. 

4076 

4077 returns: If soft is set to False, we return True holding lockGCActive. If 

4078 soft is set to False and an abort signal is pending, we return False 

4079 without holding lockGCActive. An exception is raised in case of error.""" 

4080 Util.log("=== SR %s: abort ===" % (srUuid)) 

4081 init(srUuid) 

4082 if not lockGCActive.acquireNoblock(): 

4083 gotLock = False 

4084 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4085 abortFlag = IPCFlag(srUuid) 

4086 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4087 return False 

4088 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4089 gotLock = lockGCActive.acquireNoblock() 

4090 if gotLock: 

4091 break 

4092 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4093 abortFlag.clear(FLAG_TYPE_ABORT) 

4094 if not gotLock: 

4095 raise util.CommandException(code=errno.ETIMEDOUT, 

4096 reason="SR %s: error aborting existing process" % srUuid) 

4097 return True 

4098 

4099 

4100def init(srUuid): 

4101 global lockGCRunning 

4102 if not lockGCRunning: 4102 ↛ 4103line 4102 didn't jump to line 4103, because the condition on line 4102 was never true

4103 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4104 global lockGCActive 

4105 if not lockGCActive: 4105 ↛ 4106line 4105 didn't jump to line 4106, because the condition on line 4105 was never true

4106 lockGCActive = LockActive(srUuid) 

4107 

4108 

4109class LockActive: 

4110 """ 

4111 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4112 if another process holds the SR lock. 

4113 """ 

4114 def __init__(self, srUuid): 

4115 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4116 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4117 

4118 def acquireNoblock(self): 

4119 self._srLock.acquire() 

4120 

4121 try: 

4122 return self._lock.acquireNoblock() 

4123 finally: 

4124 self._srLock.release() 

4125 

4126 def release(self): 

4127 self._lock.release() 

4128 

4129 

4130def usage(): 

4131 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4132 

4133Parameters: 

4134 -u --uuid UUID SR UUID 

4135 and one of: 

4136 -g --gc garbage collect, coalesce, and repeat while there is work 

4137 -G --gc_force garbage collect once, aborting any current operations 

4138 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4139 max_age hours 

4140 -a --abort abort any currently running operation (GC or coalesce) 

4141 -q --query query the current state (GC'ing, coalescing or not running) 

4142 -x --disable disable GC/coalesce (will be in effect until you exit) 

4143 -t --debug see Debug below 

4144 

4145Options: 

4146 -b --background run in background (return immediately) (valid for -g only) 

4147 -f --force continue in the presence of COW images with errors (when doing 

4148 GC, this might cause removal of any such images) (only valid 

4149 for -G) (DANGEROUS) 

4150 

4151Debug: 

4152 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4153 purposes. ** NEVER USE IT ON A LIVE VM ** 

4154 The following parameters are required: 

4155 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4156 "deflate". 

4157 -v --vdi_uuid VDI UUID 

4158 """ 

4159 #-d --dry-run don't actually perform any SR-modifying operations 

4160 print(output) 

4161 Util.log("(Invalid usage)") 

4162 sys.exit(1) 

4163 

4164 

4165############################################################################## 

4166# 

4167# API 

4168# 

4169def abort(srUuid, soft=False): 

4170 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4171 """ 

4172 if _abort(srUuid, soft): 

4173 stop_gc_service(srUuid) 

4174 Util.log("abort: releasing the process lock") 

4175 lockGCActive.release() 

4176 return True 

4177 else: 

4178 return False 

4179 

4180 

4181def run_gc(session, srUuid, dryRun, immediate=False): 

4182 try: 

4183 _gc(session, srUuid, dryRun, immediate=immediate) 

4184 return 0 

4185 except AbortException: 

4186 Util.log("Aborted") 

4187 return 2 

4188 except Exception: 

4189 Util.logException("gc") 

4190 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4191 return 1 

4192 

4193 

4194def gc(session, srUuid, inBackground, dryRun=False): 

4195 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4196 immediately if inBackground=True. 

4197 

4198 The following algorithm is used: 

4199 1. If we are already GC'ing in this SR, return 

4200 2. If we are already coalescing a VDI pair: 

4201 a. Scan the SR and determine if the VDI pair is GC'able 

4202 b. If the pair is not GC'able, return 

4203 c. If the pair is GC'able, abort coalesce 

4204 3. Scan the SR 

4205 4. If there is nothing to collect, nor to coalesce, return 

4206 5. If there is something to collect, GC all, then goto 3 

4207 6. If there is something to coalesce, coalesce one pair, then goto 3 

4208 """ 

4209 Util.log("=== SR %s: gc ===" % srUuid) 

4210 

4211 signal.signal(signal.SIGTERM, receiveSignal) 

4212 

4213 if inBackground: 

4214 if daemonize(): 4214 ↛ exitline 4214 didn't return from function 'gc', because the condition on line 4214 was never false

4215 # we are now running in the background. Catch & log any errors 

4216 # because there is no other way to propagate them back at this 

4217 # point 

4218 

4219 run_gc(None, srUuid, dryRun) 

4220 os._exit(0) 

4221 else: 

4222 os._exit(run_gc(session, srUuid, dryRun, immediate=True)) 

4223 

4224 

4225def start_gc(session, sr_uuid): 

4226 """ 

4227 This function is used to try to start a backgrounded GC session by forking 

4228 the current process. If using the systemd version, call start_gc_service() instead. 

4229 """ 

4230 # don't bother if an instance already running (this is just an 

4231 # optimization to reduce the overhead of forking a new process if we 

4232 # don't have to, but the process will check the lock anyways) 

4233 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4234 if not lockRunning.acquireNoblock(): 

4235 if should_preempt(session, sr_uuid): 

4236 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4237 try: 

4238 if not abort(sr_uuid, soft=True): 

4239 util.SMlog("The GC has already been scheduled to re-start") 

4240 except util.CommandException as e: 

4241 if e.code != errno.ETIMEDOUT: 

4242 raise 

4243 util.SMlog('failed to abort the GC') 

4244 else: 

4245 util.SMlog("A GC instance already running, not kicking") 

4246 return 

4247 else: 

4248 lockRunning.release() 

4249 

4250 util.SMlog(f"Starting GC file is {__file__}") 

4251 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4252 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4253 

4254def _gc_service_cmd(sr_uuid, action, extra_args=None): 

4255 """ 

4256 Build and run the systemctl command for the GC service using util.doexec. 

4257 """ 

4258 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4259 cmd=["/usr/bin/systemctl", "--quiet"] 

4260 if extra_args: 

4261 cmd.extend(extra_args) 

4262 cmd += [action, f"SMGC@{sr_uuid_esc}"] 

4263 return util.doexec(cmd) 

4264 

4265 

4266def start_gc_service(sr_uuid, wait=False): 

4267 """ 

4268 This starts the templated systemd service which runs GC on the given SR UUID. 

4269 If the service was already started, this is a no-op. 

4270 

4271 Because the service is a one-shot with RemainAfterExit=no, when called with 

4272 wait=True this will run the service synchronously and will not return until the 

4273 run has finished. This is used to force a run of the GC instead of just kicking it 

4274 in the background. 

4275 """ 

4276 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4277 _gc_service_cmd(sr_uuid, "start", extra_args=None if wait else ["--no-block"]) 

4278 

4279 

4280def stop_gc_service(sr_uuid): 

4281 """ 

4282 Stops the templated systemd service which runs GC on the given SR UUID. 

4283 """ 

4284 util.SMlog(f"Stopping SMGC@{sr_uuid}...") 

4285 (rc, _stdout, stderr) = _gc_service_cmd(sr_uuid, "stop") 

4286 if rc != 0: 4286 ↛ exitline 4286 didn't return from function 'stop_gc_service', because the condition on line 4286 was never false

4287 util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`") 

4288 

4289 

4290def wait_for_completion(sr_uuid): 

4291 while get_state(sr_uuid): 

4292 time.sleep(5) 

4293 

4294 

4295def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4296 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4297 the SR lock is held. 

4298 The following algorithm is used: 

4299 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4300 2. Scan the SR 

4301 3. GC 

4302 4. return 

4303 """ 

4304 Util.log("=== SR %s: gc_force ===" % srUuid) 

4305 init(srUuid) 

4306 sr = SR.getInstance(srUuid, session, lockSR, True) 

4307 if not lockGCActive.acquireNoblock(): 

4308 abort(srUuid) 

4309 else: 

4310 Util.log("Nothing was running, clear to proceed") 

4311 

4312 if force: 

4313 Util.log("FORCED: will continue even if there are COW image errors") 

4314 sr.scanLocked(force) 

4315 sr.cleanupCoalesceJournals() 

4316 

4317 try: 

4318 sr.cleanupCache() 

4319 sr.garbageCollect(dryRun) 

4320 finally: 

4321 sr.cleanup() 

4322 sr.logFilter.logState() 

4323 lockGCActive.release() 

4324 

4325 

4326def get_state(srUuid): 

4327 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4328 the state of the templated SMGC service and will return True if it is "activating" 

4329 or "running" (for completeness, as in practice it will never achieve the latter state) 

4330 """ 

4331 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4332 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4333 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4334 state = result.stdout.decode('utf-8').rstrip() 

4335 if state == "activating" or state == "running": 

4336 return True 

4337 return False 

4338 

4339 

4340def should_preempt(session, srUuid): 

4341 sr = SR.getInstance(srUuid, session) 

4342 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4343 if len(entries) == 0: 

4344 return False 

4345 elif len(entries) > 1: 

4346 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4347 sr.scanLocked() 

4348 coalescedUuid = entries.popitem()[0] 

4349 garbage = sr.findGarbage() 

4350 for vdi in garbage: 

4351 if vdi.uuid == coalescedUuid: 

4352 return True 

4353 return False 

4354 

4355 

4356def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4357 coalesceable = [] 

4358 sr = SR.getInstance(srUuid, session) 

4359 sr.scanLocked() 

4360 for uuid in vdiUuids: 

4361 vdi = sr.getVDI(uuid) 

4362 if not vdi: 

4363 raise util.SMException("VDI %s not found" % uuid) 

4364 if vdi.isLeafCoalesceable(): 

4365 coalesceable.append(uuid) 

4366 return coalesceable 

4367 

4368 

4369def cache_cleanup(session, srUuid, maxAge): 

4370 sr = SR.getInstance(srUuid, session) 

4371 return sr.cleanupCache(maxAge) 

4372 

4373 

4374def debug(sr_uuid, cmd, vdi_uuid): 

4375 Util.log("Debug command: %s" % cmd) 

4376 sr = SR.getInstance(sr_uuid, None) 

4377 if not isinstance(sr, LVMSR): 

4378 print("Error: not an LVHD SR") 

4379 return 

4380 sr.scanLocked() 

4381 vdi = sr.getVDI(vdi_uuid) 

4382 if not vdi: 

4383 print("Error: VDI %s not found") 

4384 return 

4385 print("Running %s on SR %s" % (cmd, sr)) 

4386 print("VDI before: %s" % vdi) 

4387 if cmd == "activate": 

4388 vdi._activate() 

4389 print("VDI file: %s" % vdi.path) 

4390 if cmd == "deactivate": 

4391 ns = NS_PREFIX_LVM + sr.uuid 

4392 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4393 if cmd == "inflate": 

4394 vdi.inflateFully() 

4395 sr.cleanup() 

4396 if cmd == "deflate": 

4397 vdi.deflate() 

4398 sr.cleanup() 

4399 sr.scanLocked() 

4400 print("VDI after: %s" % vdi) 

4401 

4402 

4403def abort_optional_reenable(uuid): 

4404 print("Disabling GC/coalesce for %s" % uuid) 

4405 ret = _abort(uuid) 

4406 input("Press enter to re-enable...") 

4407 print("GC/coalesce re-enabled") 

4408 lockGCRunning.release() 

4409 if ret: 

4410 lockGCActive.release() 

4411 

4412 

4413############################################################################## 

4414# 

4415# CLI 

4416# 

4417def main(): 

4418 action = "" 

4419 maxAge = 0 

4420 uuid = "" 

4421 background = False 

4422 force = False 

4423 dryRun = False 

4424 debug_cmd = "" 

4425 vdi_uuid = "" 

4426 shortArgs = "gGc:aqxu:bfdt:v:" 

4427 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4428 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4429 

4430 try: 

4431 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4432 except getopt.GetoptError: 

4433 usage() 

4434 for o, a in opts: 

4435 if o in ("-g", "--gc"): 

4436 action = "gc" 

4437 if o in ("-G", "--gc_force"): 

4438 action = "gc_force" 

4439 if o in ("-c", "--clean_cache"): 

4440 action = "clean_cache" 

4441 maxAge = int(a) 

4442 if o in ("-a", "--abort"): 

4443 action = "abort" 

4444 if o in ("-q", "--query"): 

4445 action = "query" 

4446 if o in ("-x", "--disable"): 

4447 action = "disable" 

4448 if o in ("-u", "--uuid"): 

4449 uuid = a 

4450 if o in ("-b", "--background"): 

4451 background = True 

4452 if o in ("-f", "--force"): 

4453 force = True 

4454 if o in ("-d", "--dry-run"): 

4455 Util.log("Dry run mode") 

4456 dryRun = True 

4457 if o in ("-t", "--debug"): 

4458 action = "debug" 

4459 debug_cmd = a 

4460 if o in ("-v", "--vdi_uuid"): 

4461 vdi_uuid = a 

4462 

4463 if not action or not uuid: 

4464 usage() 

4465 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4466 action != "debug" and (debug_cmd or vdi_uuid): 

4467 usage() 

4468 

4469 if action != "query" and action != "debug": 

4470 print("All output goes to log") 

4471 

4472 if action == "gc": 

4473 gc(None, uuid, background, dryRun) 

4474 elif action == "gc_force": 

4475 gc_force(None, uuid, force, dryRun, True) 

4476 elif action == "clean_cache": 

4477 cache_cleanup(None, uuid, maxAge) 

4478 elif action == "abort": 

4479 abort(uuid) 

4480 elif action == "query": 

4481 print("Currently running: %s" % get_state(uuid)) 

4482 elif action == "disable": 

4483 abort_optional_reenable(uuid) 

4484 elif action == "debug": 

4485 debug(uuid, debug_cmd, vdi_uuid) 

4486 

4487 

4488if __name__ == '__main__': 4488 ↛ 4489line 4488 didn't jump to line 4489, because the condition on line 4488 was never true

4489 main()