Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21from sm_typing import Optional, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import vhdutil 

41import lvhdutil 

42import lvmcache 

43import journaler 

44import fjournaler 

45import lock 

46import blktap2 

47import xs_errors 

48from refcounter import RefCounter 

49from ipc import IPCFlag 

50from lvmanager import LVActivator 

51from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

52from functools import reduce 

53from time import monotonic as _time 

54 

55try: 

56 from linstorjournaler import LinstorJournaler 

57 from linstorvhdutil import LinstorVhdUtil 

58 from linstorvolumemanager import get_controller_uri 

59 from linstorvolumemanager import LinstorVolumeManager 

60 from linstorvolumemanager import LinstorVolumeManagerError 

61 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

62 

63 LINSTOR_AVAILABLE = True 

64except ImportError: 

65 LINSTOR_AVAILABLE = False 

66 

67# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

68# possible due to lvhd_stop_using_() not working correctly. However, we leave 

69# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

70# record for use by the offline tool (which makes the operation safe by pausing 

71# the VM first) 

72AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

73 

74FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

75 

76# process "lock", used simply as an indicator that a process already exists 

77# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

78# check for the fact by trying the lock). 

79lockGCRunning = None 

80 

81# process "lock" to indicate that the GC process has been activated but may not 

82# yet be running, stops a second process from being started. 

83LOCK_TYPE_GC_ACTIVE = "gc_active" 

84lockGCActive = None 

85 

86# Default coalesce error rate limit, in messages per minute. A zero value 

87# disables throttling, and a negative value disables error reporting. 

88DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

89 

90COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

91COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

92VAR_RUN = "/var/run/" 

93SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

94 

95N_RUNNING_AVERAGE = 10 

96 

97NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

98 

99# Signal Handler 

100SIGTERM = False 

101 

102 

103class AbortException(util.SMException): 

104 pass 

105 

106 

107def receiveSignal(signalNumber, frame): 

108 global SIGTERM 

109 

110 util.SMlog("GC: recieved SIGTERM") 

111 SIGTERM = True 

112 return 

113 

114 

115################################################################################ 

116# 

117# Util 

118# 

119class Util: 

120 RET_RC = 1 

121 RET_STDOUT = 2 

122 RET_STDERR = 4 

123 

124 UUID_LEN = 36 

125 

126 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

127 

128 @staticmethod 

129 def log(text) -> None: 

130 util.SMlog(text, ident="SMGC") 

131 

132 @staticmethod 

133 def logException(tag): 

134 info = sys.exc_info() 

135 if info[0] == SystemExit: 135 ↛ 137line 135 didn't jump to line 137, because the condition on line 135 was never true

136 # this should not be happening when catching "Exception", but it is 

137 sys.exit(0) 

138 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

139 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

140 Util.log(" ***********************") 

141 Util.log(" * E X C E P T I O N *") 

142 Util.log(" ***********************") 

143 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

144 Util.log(tb) 

145 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

146 

147 @staticmethod 

148 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

149 "Execute a subprocess, then return its return code, stdout, stderr" 

150 proc = subprocess.Popen(args, 

151 stdin=subprocess.PIPE, \ 

152 stdout=subprocess.PIPE, \ 

153 stderr=subprocess.PIPE, \ 

154 shell=True, \ 

155 close_fds=True) 

156 (stdout, stderr) = proc.communicate(inputtext) 

157 stdout = str(stdout) 

158 stderr = str(stderr) 

159 rc = proc.returncode 

160 if log: 

161 Util.log("`%s`: %s" % (args, rc)) 

162 if type(expectedRC) != type([]): 

163 expectedRC = [expectedRC] 

164 if not rc in expectedRC: 

165 reason = stderr.strip() 

166 if stdout.strip(): 

167 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

168 Util.log("Failed: %s" % reason) 

169 raise util.CommandException(rc, args, reason) 

170 

171 if ret == Util.RET_RC: 

172 return rc 

173 if ret == Util.RET_STDERR: 

174 return stderr 

175 return stdout 

176 

177 @staticmethod 

178 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

179 """execute func in a separate thread and kill it if abortTest signals 

180 so""" 

181 abortSignaled = abortTest() # check now before we clear resultFlag 

182 resultFlag = IPCFlag(ns) 

183 resultFlag.clearAll() 

184 pid = os.fork() 

185 if pid: 

186 startTime = _time() 

187 try: 

188 while True: 

189 if resultFlag.test("success"): 

190 Util.log(" Child process completed successfully") 

191 resultFlag.clear("success") 

192 return 

193 if resultFlag.test("failure"): 

194 resultFlag.clear("failure") 

195 raise util.SMException("Child process exited with error") 

196 if abortTest() or abortSignaled or SIGTERM: 

197 os.killpg(pid, signal.SIGKILL) 

198 raise AbortException("Aborting due to signal") 

199 if timeOut and _time() - startTime > timeOut: 

200 os.killpg(pid, signal.SIGKILL) 

201 resultFlag.clearAll() 

202 raise util.SMException("Timed out") 

203 time.sleep(pollInterval) 

204 finally: 

205 wait_pid = 0 

206 rc = -1 

207 count = 0 

208 while wait_pid == 0 and count < 10: 

209 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

210 if wait_pid == 0: 

211 time.sleep(2) 

212 count += 1 

213 

214 if wait_pid == 0: 

215 Util.log("runAbortable: wait for process completion timed out") 

216 else: 

217 os.setpgrp() 

218 try: 

219 if func() == ret: 

220 resultFlag.set("success") 

221 else: 

222 resultFlag.set("failure") 

223 except Exception as e: 

224 Util.log("Child process failed with : (%s)" % e) 

225 resultFlag.set("failure") 

226 Util.logException("This exception has occured") 

227 os._exit(0) 

228 

229 @staticmethod 

230 def num2str(number): 

231 for prefix in ("G", "M", "K"): 231 ↛ 234line 231 didn't jump to line 234, because the loop on line 231 didn't complete

232 if number >= Util.PREFIX[prefix]: 

233 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

234 return "%s" % number 

235 

236 @staticmethod 

237 def numBits(val): 

238 count = 0 

239 while val: 

240 count += val & 1 

241 val = val >> 1 

242 return count 

243 

244 @staticmethod 

245 def countBits(bitmap1, bitmap2): 

246 """return bit count in the bitmap produced by ORing the two bitmaps""" 

247 len1 = len(bitmap1) 

248 len2 = len(bitmap2) 

249 lenLong = len1 

250 lenShort = len2 

251 bitmapLong = bitmap1 

252 if len2 > len1: 

253 lenLong = len2 

254 lenShort = len1 

255 bitmapLong = bitmap2 

256 

257 count = 0 

258 for i in range(lenShort): 

259 val = bitmap1[i] | bitmap2[i] 

260 count += Util.numBits(val) 

261 

262 for i in range(i + 1, lenLong): 

263 val = bitmapLong[i] 

264 count += Util.numBits(val) 

265 return count 

266 

267 @staticmethod 

268 def getThisScript(): 

269 thisScript = util.get_real_path(__file__) 

270 if thisScript.endswith(".pyc"): 

271 thisScript = thisScript[:-1] 

272 return thisScript 

273 

274 

275################################################################################ 

276# 

277# XAPI 

278# 

279class XAPI: 

280 USER = "root" 

281 PLUGIN_ON_SLAVE = "on-slave" 

282 

283 CONFIG_SM = 0 

284 CONFIG_OTHER = 1 

285 CONFIG_ON_BOOT = 2 

286 CONFIG_ALLOW_CACHING = 3 

287 

288 CONFIG_NAME = { 

289 CONFIG_SM: "sm-config", 

290 CONFIG_OTHER: "other-config", 

291 CONFIG_ON_BOOT: "on-boot", 

292 CONFIG_ALLOW_CACHING: "allow_caching" 

293 } 

294 

295 class LookupError(util.SMException): 

296 pass 

297 

298 @staticmethod 

299 def getSession(): 

300 session = XenAPI.xapi_local() 

301 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

302 return session 

303 

304 def __init__(self, session, srUuid): 

305 self.sessionPrivate = False 

306 self.session = session 

307 if self.session is None: 

308 self.session = self.getSession() 

309 self.sessionPrivate = True 

310 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

311 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

312 self.hostUuid = util.get_this_host() 

313 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

314 self.task = None 

315 self.task_progress = {"coalescable": 0, "done": 0} 

316 

317 def __del__(self): 

318 if self.sessionPrivate: 

319 self.session.xenapi.session.logout() 

320 

321 @property 

322 def srRef(self): 

323 return self._srRef 

324 

325 def isPluggedHere(self): 

326 pbds = self.getAttachedPBDs() 

327 for pbdRec in pbds: 

328 if pbdRec["host"] == self._hostRef: 

329 return True 

330 return False 

331 

332 def poolOK(self): 

333 host_recs = self.session.xenapi.host.get_all_records() 

334 for host_ref, host_rec in host_recs.items(): 

335 if not host_rec["enabled"]: 

336 Util.log("Host %s not enabled" % host_rec["uuid"]) 

337 return False 

338 return True 

339 

340 def isMaster(self): 

341 if self.srRecord["shared"]: 

342 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

343 return pool["master"] == self._hostRef 

344 else: 

345 pbds = self.getAttachedPBDs() 

346 if len(pbds) < 1: 

347 raise util.SMException("Local SR not attached") 

348 elif len(pbds) > 1: 

349 raise util.SMException("Local SR multiply attached") 

350 return pbds[0]["host"] == self._hostRef 

351 

352 def getAttachedPBDs(self): 

353 """Return PBD records for all PBDs of this SR that are currently 

354 attached""" 

355 attachedPBDs = [] 

356 pbds = self.session.xenapi.PBD.get_all_records() 

357 for pbdRec in pbds.values(): 

358 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

359 attachedPBDs.append(pbdRec) 

360 return attachedPBDs 

361 

362 def getOnlineHosts(self): 

363 return util.get_online_hosts(self.session) 

364 

365 def ensureInactive(self, hostRef, args): 

366 text = self.session.xenapi.host.call_plugin( \ 

367 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

368 Util.log("call-plugin returned: '%s'" % text) 

369 

370 def getRecordHost(self, hostRef): 

371 return self.session.xenapi.host.get_record(hostRef) 

372 

373 def _getRefVDI(self, uuid): 

374 return self.session.xenapi.VDI.get_by_uuid(uuid) 

375 

376 def getRefVDI(self, vdi): 

377 return self._getRefVDI(vdi.uuid) 

378 

379 def getRecordVDI(self, uuid): 

380 try: 

381 ref = self._getRefVDI(uuid) 

382 return self.session.xenapi.VDI.get_record(ref) 

383 except XenAPI.Failure: 

384 return None 

385 

386 def singleSnapshotVDI(self, vdi): 

387 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

388 {"type": "internal"}) 

389 

390 def forgetVDI(self, srUuid, vdiUuid): 

391 """Forget the VDI, but handle the case where the VDI has already been 

392 forgotten (i.e. ignore errors)""" 

393 try: 

394 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

395 self.session.xenapi.VDI.forget(vdiRef) 

396 except XenAPI.Failure: 

397 pass 

398 

399 def getConfigVDI(self, vdi, key): 

400 kind = vdi.CONFIG_TYPE[key] 

401 if kind == self.CONFIG_SM: 

402 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

403 elif kind == self.CONFIG_OTHER: 

404 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

405 elif kind == self.CONFIG_ON_BOOT: 

406 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

407 elif kind == self.CONFIG_ALLOW_CACHING: 

408 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

409 else: 

410 assert(False) 

411 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

412 return cfg 

413 

414 def removeFromConfigVDI(self, vdi, key): 

415 kind = vdi.CONFIG_TYPE[key] 

416 if kind == self.CONFIG_SM: 

417 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

418 elif kind == self.CONFIG_OTHER: 

419 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

420 else: 

421 assert(False) 

422 

423 def addToConfigVDI(self, vdi, key, val): 

424 kind = vdi.CONFIG_TYPE[key] 

425 if kind == self.CONFIG_SM: 

426 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

427 elif kind == self.CONFIG_OTHER: 

428 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

429 else: 

430 assert(False) 

431 

432 def isSnapshot(self, vdi): 

433 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

434 

435 def markCacheSRsDirty(self): 

436 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

437 'field "local_cache_enabled" = "true"') 

438 for sr_ref in sr_refs: 

439 Util.log("Marking SR %s dirty" % sr_ref) 

440 util.set_dirty(self.session, sr_ref) 

441 

442 def srUpdate(self): 

443 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

444 abortFlag = IPCFlag(self.srRecord["uuid"]) 

445 task = self.session.xenapi.Async.SR.update(self._srRef) 

446 cancelTask = True 

447 try: 

448 for i in range(60): 

449 status = self.session.xenapi.task.get_status(task) 

450 if not status == "pending": 

451 Util.log("SR.update_asynch status changed to [%s]" % status) 

452 cancelTask = False 

453 return 

454 if abortFlag.test(FLAG_TYPE_ABORT): 

455 Util.log("Abort signalled during srUpdate, cancelling task...") 

456 try: 

457 self.session.xenapi.task.cancel(task) 

458 cancelTask = False 

459 Util.log("Task cancelled") 

460 except: 

461 pass 

462 return 

463 time.sleep(1) 

464 finally: 

465 if cancelTask: 

466 self.session.xenapi.task.cancel(task) 

467 self.session.xenapi.task.destroy(task) 

468 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

469 

470 def update_task(self): 

471 self.session.xenapi.task.set_other_config( 

472 self.task, 

473 { 

474 "applies_to": self._srRef 

475 }) 

476 total = self.task_progress['coalescable'] + self.task_progress['done'] 

477 if (total > 0): 

478 self.session.xenapi.task.set_progress( 

479 self.task, float(self.task_progress['done']) / total) 

480 

481 def create_task(self, label, description): 

482 self.task = self.session.xenapi.task.create(label, description) 

483 self.update_task() 

484 

485 def update_task_progress(self, key, value): 

486 self.task_progress[key] = value 

487 if self.task: 

488 self.update_task() 

489 

490 def set_task_status(self, status): 

491 if self.task: 

492 self.session.xenapi.task.set_status(self.task, status) 

493 

494 

495################################################################################ 

496# 

497# VDI 

498# 

499class VDI(object): 

500 """Object representing a VDI of a VHD-based SR""" 

501 

502 POLL_INTERVAL = 1 

503 POLL_TIMEOUT = 30 

504 DEVICE_MAJOR = 202 

505 DRIVER_NAME_VHD = "vhd" 

506 

507 # config keys & values 

508 DB_VHD_PARENT = "vhd-parent" 

509 DB_VDI_TYPE = "vdi_type" 

510 DB_VHD_BLOCKS = "vhd-blocks" 

511 DB_VDI_PAUSED = "paused" 

512 DB_VDI_RELINKING = "relinking" 

513 DB_VDI_ACTIVATING = "activating" 

514 DB_GC = "gc" 

515 DB_COALESCE = "coalesce" 

516 DB_LEAFCLSC = "leaf-coalesce" # config key 

517 DB_GC_NO_SPACE = "gc_no_space" 

518 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

519 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

520 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

521 # no space to snap-coalesce or unable to keep 

522 # up with VDI. This is not used by the SM, it 

523 # might be used by external components. 

524 DB_ONBOOT = "on-boot" 

525 ONBOOT_RESET = "reset" 

526 DB_ALLOW_CACHING = "allow_caching" 

527 

528 CONFIG_TYPE = { 

529 DB_VHD_PARENT: XAPI.CONFIG_SM, 

530 DB_VDI_TYPE: XAPI.CONFIG_SM, 

531 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

532 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

533 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

534 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

535 DB_GC: XAPI.CONFIG_OTHER, 

536 DB_COALESCE: XAPI.CONFIG_OTHER, 

537 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

538 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

539 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

540 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

541 } 

542 

543 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

544 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

545 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

546 # feasibility of leaf coalesce 

547 

548 JRN_RELINK = "relink" # journal entry type for relinking children 

549 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

550 JRN_LEAF = "leaf" # used in coalesce-leaf 

551 

552 STR_TREE_INDENT = 4 

553 

554 def __init__(self, sr, uuid, raw): 

555 self.sr = sr 

556 self.scanError = True 

557 self.uuid = uuid 

558 self.raw = raw 

559 self.fileName = "" 

560 self.parentUuid = "" 

561 self.sizeVirt = -1 

562 self._sizeVHD = -1 

563 self._sizeAllocated = -1 

564 self._hidden = False 

565 self.parent = None 

566 self.children = [] 

567 self._vdiRef = None 

568 self._clearRef() 

569 

570 @staticmethod 

571 def extractUuid(path): 

572 raise NotImplementedError("Implement in sub class") 

573 

574 def load(self, info=None) -> None: 

575 """Load VDI info""" 

576 pass 

577 

578 def getDriverName(self) -> str: 

579 return self.DRIVER_NAME_VHD 

580 

581 def getRef(self): 

582 if self._vdiRef is None: 

583 self._vdiRef = self.sr.xapi.getRefVDI(self) 

584 return self._vdiRef 

585 

586 def getConfig(self, key, default=None): 

587 config = self.sr.xapi.getConfigVDI(self, key) 

588 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 588 ↛ 589line 588 didn't jump to line 589, because the condition on line 588 was never true

589 val = config 

590 else: 

591 val = config.get(key) 

592 if val: 

593 return val 

594 return default 

595 

596 def setConfig(self, key, val): 

597 self.sr.xapi.removeFromConfigVDI(self, key) 

598 self.sr.xapi.addToConfigVDI(self, key, val) 

599 Util.log("Set %s = %s for %s" % (key, val, self)) 

600 

601 def delConfig(self, key): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 Util.log("Removed %s from %s" % (key, self)) 

604 

605 def ensureUnpaused(self): 

606 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

607 Util.log("Unpausing VDI %s" % self) 

608 self.unpause() 

609 

610 def pause(self, failfast=False) -> None: 

611 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

612 self.uuid, failfast): 

613 raise util.SMException("Failed to pause VDI %s" % self) 

614 

615 def _report_tapdisk_unpause_error(self): 

616 try: 

617 xapi = self.sr.xapi.session.xenapi 

618 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

619 msg_name = "failed to unpause tapdisk" 

620 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

621 "VMs using this tapdisk have lost access " \ 

622 "to the corresponding disk(s)" % self.uuid 

623 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

624 except Exception as e: 

625 util.SMlog("failed to generate message: %s" % e) 

626 

627 def unpause(self): 

628 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

629 self.uuid): 

630 self._report_tapdisk_unpause_error() 

631 raise util.SMException("Failed to unpause VDI %s" % self) 

632 

633 def refresh(self, ignoreNonexistent=True): 

634 """Pause-unpause in one step""" 

635 self.sr.lock() 

636 try: 

637 try: 

638 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 638 ↛ 640line 638 didn't jump to line 640, because the condition on line 638 was never true

639 self.sr.uuid, self.uuid): 

640 self._report_tapdisk_unpause_error() 

641 raise util.SMException("Failed to refresh %s" % self) 

642 except XenAPI.Failure as e: 

643 if util.isInvalidVDI(e) and ignoreNonexistent: 

644 Util.log("VDI %s not found, ignoring" % self) 

645 return 

646 raise 

647 finally: 

648 self.sr.unlock() 

649 

650 def isSnapshot(self): 

651 return self.sr.xapi.isSnapshot(self) 

652 

653 def isAttachedRW(self): 

654 return util.is_attached_rw( 

655 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

656 

657 def getVHDBlocks(self): 

658 val = self.updateBlockInfo() 

659 bitmap = zlib.decompress(base64.b64decode(val)) 

660 return bitmap 

661 

662 def isCoalesceable(self): 

663 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

664 return not self.scanError and \ 

665 self.parent and \ 

666 len(self.parent.children) == 1 and \ 

667 self.isHidden() and \ 

668 len(self.children) > 0 

669 

670 def isLeafCoalesceable(self): 

671 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

672 return not self.scanError and \ 

673 self.parent and \ 

674 len(self.parent.children) == 1 and \ 

675 not self.isHidden() and \ 

676 len(self.children) == 0 

677 

678 def canLiveCoalesce(self, speed): 

679 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

680 isLeafCoalesceable() already""" 

681 feasibleSize = False 

682 allowedDownTime = \ 

683 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

684 vhd_size = self.getAllocatedSize() 

685 if speed: 

686 feasibleSize = \ 

687 vhd_size // speed < allowedDownTime 

688 else: 

689 feasibleSize = \ 

690 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

691 

692 return (feasibleSize or 

693 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

694 

695 def getAllPrunable(self): 

696 if len(self.children) == 0: # base case 

697 # it is possible to have a hidden leaf that was recently coalesced 

698 # onto its parent, its children already relinked but not yet 

699 # reloaded - in which case it may not be garbage collected yet: 

700 # some tapdisks could still be using the file. 

701 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

702 return [] 

703 if not self.scanError and self.isHidden(): 

704 return [self] 

705 return [] 

706 

707 thisPrunable = True 

708 vdiList = [] 

709 for child in self.children: 

710 childList = child.getAllPrunable() 

711 vdiList.extend(childList) 

712 if child not in childList: 

713 thisPrunable = False 

714 

715 # We can destroy the current VDI if all childs are hidden BUT the 

716 # current VDI must be hidden too to do that! 

717 # Example in this case (after a failed live leaf coalesce): 

718 # 

719 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

720 # SMGC: [32436] b5458d61(1.000G/4.127M) 

721 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

722 # 

723 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

724 # Normally we are not in this function when the delete action is 

725 # executed but in `_liveLeafCoalesce`. 

726 

727 if not self.scanError and not self.isHidden() and thisPrunable: 

728 vdiList.append(self) 

729 return vdiList 

730 

731 def getSizeVHD(self) -> int: 

732 return self._sizeVHD 

733 

734 def getAllocatedSize(self) -> int: 

735 return self._sizeAllocated 

736 

737 def getTreeRoot(self): 

738 "Get the root of the tree that self belongs to" 

739 root = self 

740 while root.parent: 

741 root = root.parent 

742 return root 

743 

744 def getTreeHeight(self): 

745 "Get the height of the subtree rooted at self" 

746 if len(self.children) == 0: 

747 return 1 

748 

749 maxChildHeight = 0 

750 for child in self.children: 

751 childHeight = child.getTreeHeight() 

752 if childHeight > maxChildHeight: 

753 maxChildHeight = childHeight 

754 

755 return maxChildHeight + 1 

756 

757 def getAllLeaves(self): 

758 "Get all leaf nodes in the subtree rooted at self" 

759 if len(self.children) == 0: 

760 return [self] 

761 

762 leaves = [] 

763 for child in self.children: 

764 leaves.extend(child.getAllLeaves()) 

765 return leaves 

766 

767 def updateBlockInfo(self) -> Optional[str]: 

768 val = base64.b64encode(self._queryVHDBlocks()).decode() 

769 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

770 return val 

771 

772 def rename(self, uuid) -> None: 

773 "Rename the VDI file" 

774 assert(not self.sr.vdis.get(uuid)) 

775 self._clearRef() 

776 oldUuid = self.uuid 

777 self.uuid = uuid 

778 self.children = [] 

779 # updating the children themselves is the responsibility of the caller 

780 del self.sr.vdis[oldUuid] 

781 self.sr.vdis[self.uuid] = self 

782 

783 def delete(self) -> None: 

784 "Physically delete the VDI" 

785 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

786 lock.Lock.cleanupAll(self.uuid) 

787 self._clear() 

788 

789 def getParent(self) -> str: 

790 return vhdutil.getParent(self.path, lambda x: x.strip()) 790 ↛ exitline 790 didn't run the lambda on line 790

791 

792 def repair(self, parent) -> None: 

793 vhdutil.repair(parent) 

794 

795 @override 

796 def __str__(self) -> str: 

797 strHidden = "" 

798 if self.isHidden(): 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true

799 strHidden = "*" 

800 strSizeVirt = "?" 

801 if self.sizeVirt > 0: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true

802 strSizeVirt = Util.num2str(self.sizeVirt) 

803 strSizeVHD = "?" 

804 if self._sizeVHD > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true

805 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

806 strSizeAllocated = "?" 

807 if self._sizeAllocated >= 0: 

808 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

809 strType = "" 

810 if self.raw: 

811 strType = "[RAW]" 

812 strSizeVHD = "" 

813 

814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

815 strSizeVHD, strSizeAllocated, strType) 

816 

817 def validate(self, fast=False) -> None: 

818 if not vhdutil.check(self.path, fast=fast): 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true

819 raise util.SMException("VHD %s corrupted" % self) 

820 

821 def _clear(self): 

822 self.uuid = "" 

823 self.path = "" 

824 self.parentUuid = "" 

825 self.parent = None 

826 self._clearRef() 

827 

828 def _clearRef(self): 

829 self._vdiRef = None 

830 

831 def _doCoalesce(self) -> None: 

832 """Coalesce self onto parent. Only perform the actual coalescing of 

833 VHD, but not the subsequent relinking. We'll do that as the next step, 

834 after reloading the entire SR in case things have changed while we 

835 were coalescing""" 

836 self.validate() 

837 self.parent.validate(True) 

838 self.parent._increaseSizeVirt(self.sizeVirt) 

839 self.sr._updateSlavesOnResize(self.parent) 

840 self._coalesceVHD(0) 

841 self.parent.validate(True) 

842 #self._verifyContents(0) 

843 self.parent.updateBlockInfo() 

844 

845 def _verifyContents(self, timeOut): 

846 Util.log(" Coalesce verification on %s" % self) 

847 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

848 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

849 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

850 Util.log(" Coalesce verification succeeded") 

851 

852 def _runTapdiskDiff(self): 

853 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

854 (self.getDriverName(), self.path, \ 

855 self.parent.getDriverName(), self.parent.path) 

856 Util.doexec(cmd, 0) 

857 return True 

858 

859 @staticmethod 

860 def _reportCoalesceError(vdi, ce): 

861 """Reports a coalesce error to XenCenter. 

862 

863 vdi: the VDI object on which the coalesce error occured 

864 ce: the CommandException that was raised""" 

865 

866 msg_name = os.strerror(ce.code) 

867 if ce.code == errno.ENOSPC: 

868 # TODO We could add more information here, e.g. exactly how much 

869 # space is required for the particular coalesce, as well as actions 

870 # to be taken by the user and consequences of not taking these 

871 # actions. 

872 msg_body = 'Run out of space while coalescing.' 

873 elif ce.code == errno.EIO: 

874 msg_body = 'I/O error while coalescing.' 

875 else: 

876 msg_body = '' 

877 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

878 % (vdi.sr.uuid, msg_name, msg_body)) 

879 

880 # Create a XenCenter message, but don't spam. 

881 xapi = vdi.sr.xapi.session.xenapi 

882 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

883 oth_cfg = xapi.SR.get_other_config(sr_ref) 

884 if COALESCE_ERR_RATE_TAG in oth_cfg: 

885 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

886 else: 

887 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

888 

889 xcmsg = False 

890 if coalesce_err_rate == 0: 

891 xcmsg = True 

892 elif coalesce_err_rate > 0: 

893 now = datetime.datetime.now() 

894 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

895 if COALESCE_LAST_ERR_TAG in sm_cfg: 

896 # seconds per message (minimum distance in time between two 

897 # messages in seconds) 

898 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

899 last = datetime.datetime.fromtimestamp( 

900 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

901 if now - last >= spm: 

902 xapi.SR.remove_from_sm_config(sr_ref, 

903 COALESCE_LAST_ERR_TAG) 

904 xcmsg = True 

905 else: 

906 xcmsg = True 

907 if xcmsg: 

908 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

909 str(now.strftime('%s'))) 

910 if xcmsg: 

911 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

912 

913 def coalesce(self) -> int: 

914 # size is returned in sectors 

915 return vhdutil.coalesce(self.path) * 512 

916 

917 @staticmethod 

918 def _doCoalesceVHD(vdi): 

919 try: 

920 startTime = time.time() 

921 vhdSize = vdi.getAllocatedSize() 

922 coalesced_size = vdi.coalesce() 

923 endTime = time.time() 

924 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

925 except util.CommandException as ce: 

926 # We use try/except for the following piece of code because it runs 

927 # in a separate process context and errors will not be caught and 

928 # reported by anyone. 

929 try: 

930 # Report coalesce errors back to user via XC 

931 VDI._reportCoalesceError(vdi, ce) 

932 except Exception as e: 

933 util.SMlog('failed to create XenCenter message: %s' % e) 

934 raise ce 

935 except: 

936 raise 

937 

938 def _vdi_is_raw(self, vdi_path): 

939 """ 

940 Given path to vdi determine if it is raw 

941 """ 

942 uuid = self.extractUuid(vdi_path) 

943 return self.sr.vdis[uuid].raw 

944 

945 def _coalesceVHD(self, timeOut): 

946 Util.log(" Running VHD coalesce on %s" % self) 

947 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 947 ↛ exitline 947 didn't run the lambda on line 947

948 try: 

949 util.fistpoint.activate_custom_fn( 

950 "cleanup_coalesceVHD_inject_failure", 

951 util.inject_failure) 

952 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

953 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

954 except: 

955 #exception at this phase could indicate a failure in vhd coalesce 

956 # or a kill of vhd coalesce by runAbortable due to timeOut 

957 # Try a repair and reraise the exception 

958 parent = "" 

959 try: 

960 parent = self.getParent() 

961 if not self._vdi_is_raw(parent): 

962 # Repair error is logged and ignored. Error reraised later 

963 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

964 'parent %s' % (self.uuid, parent)) 

965 self.repair(parent) 

966 except Exception as e: 

967 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

968 'after failed coalesce on %s, err: %s' % 

969 (parent, self.path, e)) 

970 raise 

971 

972 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

973 

974 def _relinkSkip(self) -> None: 

975 """Relink children of this VDI to point to the parent of this VDI""" 

976 abortFlag = IPCFlag(self.sr.uuid) 

977 for child in self.children: 

978 if abortFlag.test(FLAG_TYPE_ABORT): 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true

979 raise AbortException("Aborting due to signal") 

980 Util.log(" Relinking %s from %s to %s" % \ 

981 (child, self, self.parent)) 

982 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

983 child._setParent(self.parent) 

984 self.children = [] 

985 

986 def _reloadChildren(self, vdiSkip): 

987 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

988 the VHD metadata for this file in any online VDI""" 

989 abortFlag = IPCFlag(self.sr.uuid) 

990 for child in self.children: 

991 if child == vdiSkip: 

992 continue 

993 if abortFlag.test(FLAG_TYPE_ABORT): 993 ↛ 994line 993 didn't jump to line 994, because the condition on line 993 was never true

994 raise AbortException("Aborting due to signal") 

995 Util.log(" Reloading VDI %s" % child) 

996 child._reload() 

997 

998 def _reload(self): 

999 """Pause & unpause to cause blktap to reload the VHD metadata""" 

1000 for child in self.children: 1000 ↛ 1001line 1000 didn't jump to line 1001, because the loop on line 1000 never started

1001 child._reload() 

1002 

1003 # only leaves can be attached 

1004 if len(self.children) == 0: 1004 ↛ exitline 1004 didn't return from function '_reload', because the condition on line 1004 was never false

1005 try: 

1006 self.delConfig(VDI.DB_VDI_RELINKING) 

1007 except XenAPI.Failure as e: 

1008 if not util.isInvalidVDI(e): 

1009 raise 

1010 self.refresh() 

1011 

1012 def _tagChildrenForRelink(self): 

1013 if len(self.children) == 0: 

1014 retries = 0 

1015 try: 

1016 while retries < 15: 

1017 retries += 1 

1018 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1019 Util.log("VDI %s is activating, wait to relink" % 

1020 self.uuid) 

1021 else: 

1022 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1023 

1024 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1025 self.delConfig(VDI.DB_VDI_RELINKING) 

1026 Util.log("VDI %s started activating while tagging" % 

1027 self.uuid) 

1028 else: 

1029 return 

1030 time.sleep(2) 

1031 

1032 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1033 except XenAPI.Failure as e: 

1034 if not util.isInvalidVDI(e): 

1035 raise 

1036 

1037 for child in self.children: 

1038 child._tagChildrenForRelink() 

1039 

1040 def _loadInfoParent(self): 

1041 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1042 if ret: 

1043 self.parentUuid = ret 

1044 

1045 def _setParent(self, parent) -> None: 

1046 vhdutil.setParent(self.path, parent.path, False) 

1047 self.parent = parent 

1048 self.parentUuid = parent.uuid 

1049 parent.children.append(self) 

1050 try: 

1051 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1052 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1053 (self.uuid, self.parentUuid)) 

1054 except: 

1055 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1056 (self.uuid, self.parentUuid)) 

1057 

1058 def isHidden(self) -> bool: 

1059 if self._hidden is None: 1059 ↛ 1060line 1059 didn't jump to line 1060, because the condition on line 1059 was never true

1060 self._loadInfoHidden() 

1061 return self._hidden 

1062 

1063 def _loadInfoHidden(self) -> None: 

1064 hidden = vhdutil.getHidden(self.path) 

1065 self._hidden = (hidden != 0) 

1066 

1067 def _setHidden(self, hidden=True) -> None: 

1068 self._hidden = None 

1069 vhdutil.setHidden(self.path, hidden) 

1070 self._hidden = hidden 

1071 

1072 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1073 """ensure the virtual size of 'self' is at least 'size'. Note that 

1074 resizing a VHD must always be offline and atomically: the file must 

1075 not be open by anyone and no concurrent operations may take place. 

1076 Thus we use the Agent API call for performing paused atomic 

1077 operations. If the caller is already in the atomic context, it must 

1078 call with atomic = False""" 

1079 if self.sizeVirt >= size: 1079 ↛ 1081line 1079 didn't jump to line 1081, because the condition on line 1079 was never false

1080 return 

1081 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1082 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1083 

1084 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1085 if (size <= msize): 

1086 vhdutil.setSizeVirtFast(self.path, size) 

1087 else: 

1088 if atomic: 

1089 vdiList = self._getAllSubtree() 

1090 self.sr.lock() 

1091 try: 

1092 self.sr.pauseVDIs(vdiList) 

1093 try: 

1094 self._setSizeVirt(size) 

1095 finally: 

1096 self.sr.unpauseVDIs(vdiList) 

1097 finally: 

1098 self.sr.unlock() 

1099 else: 

1100 self._setSizeVirt(size) 

1101 

1102 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1103 

1104 def _setSizeVirt(self, size) -> None: 

1105 """WARNING: do not call this method directly unless all VDIs in the 

1106 subtree are guaranteed to be unplugged (and remain so for the duration 

1107 of the operation): this operation is only safe for offline VHDs""" 

1108 jFile = os.path.join(self.sr.path, self.uuid) 

1109 vhdutil.setSizeVirt(self.path, size, jFile) 

1110 

1111 def _queryVHDBlocks(self) -> bytes: 

1112 return vhdutil.getBlockBitmap(self.path) 

1113 

1114 def _getCoalescedSizeData(self): 

1115 """Get the data size of the resulting VHD if we coalesce self onto 

1116 parent. We calculate the actual size by using the VHD block allocation 

1117 information (as opposed to just adding up the two VHD sizes to get an 

1118 upper bound)""" 

1119 # make sure we don't use stale BAT info from vdi_rec since the child 

1120 # was writable all this time 

1121 self.delConfig(VDI.DB_VHD_BLOCKS) 

1122 blocksChild = self.getVHDBlocks() 

1123 blocksParent = self.parent.getVHDBlocks() 

1124 numBlocks = Util.countBits(blocksChild, blocksParent) 

1125 Util.log("Num combined blocks = %d" % numBlocks) 

1126 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1127 assert(sizeData <= self.sizeVirt) 

1128 return sizeData 

1129 

1130 def _calcExtraSpaceForCoalescing(self) -> int: 

1131 sizeData = self._getCoalescedSizeData() 

1132 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1133 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1134 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1135 return sizeCoalesced - self.parent.getSizeVHD() 

1136 

1137 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1138 """How much extra space in the SR will be required to 

1139 [live-]leaf-coalesce this VDI""" 

1140 # the space requirements are the same as for inline coalesce 

1141 return self._calcExtraSpaceForCoalescing() 

1142 

1143 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1144 """How much extra space in the SR will be required to 

1145 snapshot-coalesce this VDI""" 

1146 return self._calcExtraSpaceForCoalescing() + \ 

1147 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1148 

1149 def _getAllSubtree(self): 

1150 """Get self and all VDIs in the subtree of self as a flat list""" 

1151 vdiList = [self] 

1152 for child in self.children: 

1153 vdiList.extend(child._getAllSubtree()) 

1154 return vdiList 

1155 

1156 

1157class FileVDI(VDI): 

1158 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1159 

1160 @override 

1161 @staticmethod 

1162 def extractUuid(path): 

1163 path = os.path.basename(path.strip()) 

1164 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1164 ↛ 1166line 1164 didn't jump to line 1166, because the condition on line 1164 was never true

1165 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1166 return None 

1167 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1168 vhdutil.FILE_EXTN_RAW, "") 

1169 # TODO: validate UUID format 

1170 return uuid 

1171 

1172 def __init__(self, sr, uuid, raw): 

1173 VDI.__init__(self, sr, uuid, raw) 

1174 if self.raw: 1174 ↛ 1175line 1174 didn't jump to line 1175, because the condition on line 1174 was never true

1175 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1176 else: 

1177 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1178 

1179 @override 

1180 def load(self, info=None) -> None: 

1181 if not info: 

1182 if not util.pathexists(self.path): 

1183 raise util.SMException("%s not found" % self.path) 

1184 try: 

1185 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1186 except util.SMException: 

1187 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1188 return 

1189 self.parent = None 

1190 self.children = [] 

1191 self.parentUuid = info.parentUuid 

1192 self.sizeVirt = info.sizeVirt 

1193 self._sizeVHD = info.sizePhys 

1194 self._sizeAllocated = info.sizeAllocated 

1195 self._hidden = info.hidden 

1196 self.scanError = False 

1197 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1198 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1199 

1200 @override 

1201 def rename(self, uuid) -> None: 

1202 oldPath = self.path 

1203 VDI.rename(self, uuid) 

1204 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1205 self.path = os.path.join(self.sr.path, self.fileName) 

1206 assert(not util.pathexists(self.path)) 

1207 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1208 os.rename(oldPath, self.path) 

1209 

1210 @override 

1211 def delete(self) -> None: 

1212 if len(self.children) > 0: 1212 ↛ 1213line 1212 didn't jump to line 1213, because the condition on line 1212 was never true

1213 raise util.SMException("VDI %s has children, can't delete" % \ 

1214 self.uuid) 

1215 try: 

1216 self.sr.lock() 

1217 try: 

1218 os.unlink(self.path) 

1219 self.sr.forgetVDI(self.uuid) 

1220 finally: 

1221 self.sr.unlock() 

1222 except OSError: 

1223 raise util.SMException("os.unlink(%s) failed" % self.path) 

1224 VDI.delete(self) 

1225 

1226 @override 

1227 def getAllocatedSize(self) -> int: 

1228 if self._sizeAllocated == -1: 1228 ↛ 1229line 1228 didn't jump to line 1229, because the condition on line 1228 was never true

1229 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1230 return self._sizeAllocated 

1231 

1232 

1233class LVHDVDI(VDI): 

1234 """Object representing a VDI in an LVHD SR""" 

1235 

1236 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1237 DRIVER_NAME_RAW = "aio" 

1238 

1239 @override 

1240 def load(self, info=None) -> None: 

1241 # `info` is always set. `None` default value is only here to match parent method. 

1242 assert info, "No info given to LVHDVDI.load" 

1243 self.parent = None 

1244 self.children = [] 

1245 self._sizeVHD = -1 

1246 self._sizeAllocated = -1 

1247 self.scanError = info.scanError 

1248 self.sizeLV = info.sizeLV 

1249 self.sizeVirt = info.sizeVirt 

1250 self.fileName = info.lvName 

1251 self.lvActive = info.lvActive 

1252 self.lvOpen = info.lvOpen 

1253 self.lvReadonly = info.lvReadonly 

1254 self._hidden = info.hidden 

1255 self.parentUuid = info.parentUuid 

1256 self.path = os.path.join(self.sr.path, self.fileName) 

1257 

1258 @override 

1259 @staticmethod 

1260 def extractUuid(path): 

1261 return lvhdutil.extractUuid(path) 

1262 

1263 @override 

1264 def getDriverName(self) -> str: 

1265 if self.raw: 

1266 return self.DRIVER_NAME_RAW 

1267 return self.DRIVER_NAME_VHD 

1268 

1269 def inflate(self, size): 

1270 """inflate the LV containing the VHD to 'size'""" 

1271 if self.raw: 

1272 return 

1273 self._activate() 

1274 self.sr.lock() 

1275 try: 

1276 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1277 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1278 finally: 

1279 self.sr.unlock() 

1280 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1281 self._sizeVHD = -1 

1282 self._sizeAllocated = -1 

1283 

1284 def deflate(self): 

1285 """deflate the LV containing the VHD to minimum""" 

1286 if self.raw: 

1287 return 

1288 self._activate() 

1289 self.sr.lock() 

1290 try: 

1291 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1292 finally: 

1293 self.sr.unlock() 

1294 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1295 self._sizeVHD = -1 

1296 self._sizeAllocated = -1 

1297 

1298 def inflateFully(self): 

1299 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1300 

1301 def inflateParentForCoalesce(self): 

1302 """Inflate the parent only as much as needed for the purposes of 

1303 coalescing""" 

1304 if self.parent.raw: 

1305 return 

1306 inc = self._calcExtraSpaceForCoalescing() 

1307 if inc > 0: 

1308 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1309 self.parent.inflate(self.parent.sizeLV + inc) 

1310 

1311 @override 

1312 def updateBlockInfo(self) -> Optional[str]: 

1313 if not self.raw: 

1314 return VDI.updateBlockInfo(self) 

1315 return None 

1316 

1317 @override 

1318 def rename(self, uuid) -> None: 

1319 oldUuid = self.uuid 

1320 oldLVName = self.fileName 

1321 VDI.rename(self, uuid) 

1322 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1323 if self.raw: 

1324 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1325 self.path = os.path.join(self.sr.path, self.fileName) 

1326 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1327 

1328 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1329 if self.sr.lvActivator.get(oldUuid, False): 

1330 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1331 

1332 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1333 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1334 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1335 RefCounter.reset(oldUuid, ns) 

1336 

1337 @override 

1338 def delete(self) -> None: 

1339 if len(self.children) > 0: 

1340 raise util.SMException("VDI %s has children, can't delete" % \ 

1341 self.uuid) 

1342 self.sr.lock() 

1343 try: 

1344 self.sr.lvmCache.remove(self.fileName) 

1345 self.sr.forgetVDI(self.uuid) 

1346 finally: 

1347 self.sr.unlock() 

1348 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1349 VDI.delete(self) 

1350 

1351 @override 

1352 def getSizeVHD(self) -> int: 

1353 if self._sizeVHD == -1: 

1354 self._loadInfoSizeVHD() 

1355 return self._sizeVHD 

1356 

1357 def _loadInfoSizeVHD(self): 

1358 """Get the physical utilization of the VHD file. We do it individually 

1359 (and not using the VHD batch scanner) as an optimization: this info is 

1360 relatively expensive and we need it only for VDI's involved in 

1361 coalescing.""" 

1362 if self.raw: 

1363 return 

1364 self._activate() 

1365 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1366 if self._sizeVHD <= 0: 

1367 raise util.SMException("phys size of %s = %d" % \ 

1368 (self, self._sizeVHD)) 

1369 

1370 @override 

1371 def getAllocatedSize(self) -> int: 

1372 if self._sizeAllocated == -1: 

1373 self._loadInfoSizeAllocated() 

1374 return self._sizeAllocated 

1375 

1376 def _loadInfoSizeAllocated(self): 

1377 """ 

1378 Get the allocated size of the VHD volume. 

1379 """ 

1380 if self.raw: 

1381 return 

1382 self._activate() 

1383 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1384 

1385 @override 

1386 def _loadInfoHidden(self) -> None: 

1387 if self.raw: 

1388 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1389 else: 

1390 VDI._loadInfoHidden(self) 

1391 

1392 @override 

1393 def _setHidden(self, hidden=True) -> None: 

1394 if self.raw: 

1395 self._hidden = None 

1396 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1397 self._hidden = hidden 

1398 else: 

1399 VDI._setHidden(self, hidden) 

1400 

1401 @override 

1402 def __str__(self) -> str: 

1403 strType = "VHD" 

1404 if self.raw: 

1405 strType = "RAW" 

1406 strHidden = "" 

1407 if self.isHidden(): 

1408 strHidden = "*" 

1409 strSizeVHD = "" 

1410 if self._sizeVHD > 0: 

1411 strSizeVHD = Util.num2str(self._sizeVHD) 

1412 strSizeAllocated = "" 

1413 if self._sizeAllocated >= 0: 

1414 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1415 strActive = "n" 

1416 if self.lvActive: 

1417 strActive = "a" 

1418 if self.lvOpen: 

1419 strActive += "o" 

1420 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1421 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, 

1422 Util.num2str(self.sizeLV), strActive) 

1423 

1424 @override 

1425 def validate(self, fast=False) -> None: 

1426 if not self.raw: 

1427 VDI.validate(self, fast) 

1428 

1429 @override 

1430 def _doCoalesce(self) -> None: 

1431 """LVHD parents must first be activated, inflated, and made writable""" 

1432 try: 

1433 self._activateChain() 

1434 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1435 self.parent.validate() 

1436 self.inflateParentForCoalesce() 

1437 VDI._doCoalesce(self) 

1438 finally: 

1439 self.parent._loadInfoSizeVHD() 

1440 self.parent.deflate() 

1441 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1442 

1443 @override 

1444 def _setParent(self, parent) -> None: 

1445 self._activate() 

1446 if self.lvReadonly: 

1447 self.sr.lvmCache.setReadonly(self.fileName, False) 

1448 

1449 try: 

1450 vhdutil.setParent(self.path, parent.path, parent.raw) 

1451 finally: 

1452 if self.lvReadonly: 

1453 self.sr.lvmCache.setReadonly(self.fileName, True) 

1454 self._deactivate() 

1455 self.parent = parent 

1456 self.parentUuid = parent.uuid 

1457 parent.children.append(self) 

1458 try: 

1459 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1460 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1461 (self.uuid, self.parentUuid)) 

1462 except: 

1463 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1464 (self.parentUuid, self.uuid)) 

1465 

1466 def _activate(self): 

1467 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1468 

1469 def _activateChain(self): 

1470 vdi = self 

1471 while vdi: 

1472 vdi._activate() 

1473 vdi = vdi.parent 

1474 

1475 def _deactivate(self): 

1476 self.sr.lvActivator.deactivate(self.uuid, False) 

1477 

1478 @override 

1479 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1480 "ensure the virtual size of 'self' is at least 'size'" 

1481 self._activate() 

1482 if not self.raw: 

1483 VDI._increaseSizeVirt(self, size, atomic) 

1484 return 

1485 

1486 # raw VDI case 

1487 offset = self.sizeLV 

1488 if self.sizeVirt < size: 

1489 oldSize = self.sizeLV 

1490 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1491 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1492 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1493 offset = oldSize 

1494 unfinishedZero = False 

1495 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1496 if jval: 

1497 unfinishedZero = True 

1498 offset = int(jval) 

1499 length = self.sizeLV - offset 

1500 if not length: 

1501 return 

1502 

1503 if unfinishedZero: 

1504 Util.log(" ==> Redoing unfinished zeroing out") 

1505 else: 

1506 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1507 str(offset)) 

1508 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1509 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1510 func = lambda: util.zeroOut(self.path, offset, length) 

1511 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1512 VDI.POLL_INTERVAL, 0) 

1513 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1514 

1515 @override 

1516 def _setSizeVirt(self, size) -> None: 

1517 """WARNING: do not call this method directly unless all VDIs in the 

1518 subtree are guaranteed to be unplugged (and remain so for the duration 

1519 of the operation): this operation is only safe for offline VHDs""" 

1520 self._activate() 

1521 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1522 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1523 try: 

1524 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1525 size, jFile) 

1526 finally: 

1527 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1528 

1529 @override 

1530 def _queryVHDBlocks(self) -> bytes: 

1531 self._activate() 

1532 return VDI._queryVHDBlocks(self) 

1533 

1534 @override 

1535 def _calcExtraSpaceForCoalescing(self) -> int: 

1536 if self.parent.raw: 

1537 return 0 # raw parents are never deflated in the first place 

1538 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1539 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1540 return sizeCoalesced - self.parent.sizeLV 

1541 

1542 @override 

1543 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1544 """How much extra space in the SR will be required to 

1545 [live-]leaf-coalesce this VDI""" 

1546 # we can deflate the leaf to minimize the space requirements 

1547 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1548 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1549 

1550 @override 

1551 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1552 return self._calcExtraSpaceForCoalescing() + \ 

1553 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1554 

1555 

1556class LinstorVDI(VDI): 

1557 """Object representing a VDI in a LINSTOR SR""" 

1558 

1559 VOLUME_LOCK_TIMEOUT = 30 

1560 

1561 @override 

1562 def load(self, info=None) -> None: 

1563 self.parentUuid = info.parentUuid 

1564 self.scanError = True 

1565 self.parent = None 

1566 self.children = [] 

1567 

1568 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1569 self.path = self.sr._linstor.build_device_path(self.fileName) 

1570 

1571 if not info: 

1572 try: 

1573 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1574 except util.SMException: 

1575 Util.log( 

1576 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1577 ) 

1578 return 

1579 

1580 self.parentUuid = info.parentUuid 

1581 self.sizeVirt = info.sizeVirt 

1582 self._sizeVHD = -1 

1583 self._sizeAllocated = -1 

1584 self.drbd_size = -1 

1585 self._hidden = info.hidden 

1586 self.scanError = False 

1587 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1588 

1589 @override 

1590 def getSizeVHD(self, fetch=False) -> int: 

1591 if self._sizeVHD < 0 or fetch: 

1592 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1593 return self._sizeVHD 

1594 

1595 def getDrbdSize(self, fetch=False): 

1596 if self.drbd_size < 0 or fetch: 

1597 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1598 return self.drbd_size 

1599 

1600 @override 

1601 def getAllocatedSize(self) -> int: 

1602 if self._sizeAllocated == -1: 

1603 if not self.raw: 

1604 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) 

1605 return self._sizeAllocated 

1606 

1607 def inflate(self, size): 

1608 if self.raw: 

1609 return 

1610 self.sr.lock() 

1611 try: 

1612 # Ensure we use the real DRBD size and not the cached one. 

1613 # Why? Because this attribute can be changed if volume is resized by user. 

1614 self.drbd_size = self.getDrbdSize(fetch=True) 

1615 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1616 finally: 

1617 self.sr.unlock() 

1618 self.drbd_size = -1 

1619 self._sizeVHD = -1 

1620 self._sizeAllocated = -1 

1621 

1622 def deflate(self): 

1623 if self.raw: 

1624 return 

1625 self.sr.lock() 

1626 try: 

1627 # Ensure we use the real sizes and not the cached info. 

1628 self.drbd_size = self.getDrbdSize(fetch=True) 

1629 self._sizeVHD = self.getSizeVHD(fetch=True) 

1630 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1631 finally: 

1632 self.sr.unlock() 

1633 self.drbd_size = -1 

1634 self._sizeVHD = -1 

1635 self._sizeAllocated = -1 

1636 

1637 def inflateFully(self): 

1638 if not self.raw: 

1639 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1640 

1641 @override 

1642 def rename(self, uuid) -> None: 

1643 Util.log('Renaming {} -> {} (path={})'.format( 

1644 self.uuid, uuid, self.path 

1645 )) 

1646 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1647 VDI.rename(self, uuid) 

1648 

1649 @override 

1650 def delete(self) -> None: 

1651 if len(self.children) > 0: 

1652 raise util.SMException( 

1653 'VDI {} has children, can\'t delete'.format(self.uuid) 

1654 ) 

1655 self.sr.lock() 

1656 try: 

1657 self.sr._linstor.destroy_volume(self.uuid) 

1658 self.sr.forgetVDI(self.uuid) 

1659 finally: 

1660 self.sr.unlock() 

1661 VDI.delete(self) 

1662 

1663 @override 

1664 def validate(self, fast=False) -> None: 

1665 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1666 raise util.SMException('VHD {} corrupted'.format(self)) 

1667 

1668 @override 

1669 def pause(self, failfast=False) -> None: 

1670 self.sr._linstor.ensure_volume_is_not_locked( 

1671 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1672 ) 

1673 return super(LinstorVDI, self).pause(failfast) 

1674 

1675 @override 

1676 def coalesce(self) -> int: 

1677 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1678 # Using another exception we can't execute the next coalesce calls. 

1679 return self.sr._vhdutil.force_coalesce(self.path) * 512 

1680 

1681 @override 

1682 def getParent(self) -> str: 

1683 return self.sr._vhdutil.get_parent( 

1684 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1685 ) 

1686 

1687 @override 

1688 def repair(self, parent_uuid) -> None: 

1689 self.sr._vhdutil.force_repair( 

1690 self.sr._linstor.get_device_path(parent_uuid) 

1691 ) 

1692 

1693 @override 

1694 def _relinkSkip(self) -> None: 

1695 abortFlag = IPCFlag(self.sr.uuid) 

1696 for child in self.children: 

1697 if abortFlag.test(FLAG_TYPE_ABORT): 

1698 raise AbortException('Aborting due to signal') 

1699 Util.log( 

1700 ' Relinking {} from {} to {}'.format( 

1701 child, self, self.parent 

1702 ) 

1703 ) 

1704 

1705 session = child.sr.xapi.session 

1706 sr_uuid = child.sr.uuid 

1707 vdi_uuid = child.uuid 

1708 try: 

1709 self.sr._linstor.ensure_volume_is_not_locked( 

1710 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1711 ) 

1712 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1713 child._setParent(self.parent) 

1714 finally: 

1715 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1716 self.children = [] 

1717 

1718 @override 

1719 def _setParent(self, parent) -> None: 

1720 self.sr._linstor.get_device_path(self.uuid) 

1721 self.sr._vhdutil.force_parent(self.path, parent.path) 

1722 self.parent = parent 

1723 self.parentUuid = parent.uuid 

1724 parent.children.append(self) 

1725 try: 

1726 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1727 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1728 (self.uuid, self.parentUuid)) 

1729 except: 

1730 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1731 (self.uuid, self.parentUuid)) 

1732 

1733 @override 

1734 def _doCoalesce(self) -> None: 

1735 try: 

1736 self._activateChain() 

1737 self.parent.validate() 

1738 self._inflateParentForCoalesce() 

1739 VDI._doCoalesce(self) 

1740 finally: 

1741 self.parent.deflate() 

1742 

1743 def _activateChain(self): 

1744 vdi = self 

1745 while vdi: 

1746 try: 

1747 p = self.sr._linstor.get_device_path(vdi.uuid) 

1748 except Exception as e: 

1749 # Use SMException to skip coalesce. 

1750 # Otherwise the GC is stopped... 

1751 raise util.SMException(str(e)) 

1752 vdi = vdi.parent 

1753 

1754 @override 

1755 def _setHidden(self, hidden=True) -> None: 

1756 HIDDEN_TAG = 'hidden' 

1757 

1758 if self.raw: 

1759 self._hidden = None 

1760 self.sr._linstor.update_volume_metadata(self.uuid, { 

1761 HIDDEN_TAG: hidden 

1762 }) 

1763 self._hidden = hidden 

1764 else: 

1765 VDI._setHidden(self, hidden) 

1766 

1767 @override 

1768 def _increaseSizeVirt(self, size, atomic=True): 

1769 if self.raw: 

1770 offset = self.drbd_size 

1771 if self.sizeVirt < size: 

1772 oldSize = self.drbd_size 

1773 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1774 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1775 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1776 offset = oldSize 

1777 unfinishedZero = False 

1778 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1779 if jval: 

1780 unfinishedZero = True 

1781 offset = int(jval) 

1782 length = self.drbd_size - offset 

1783 if not length: 

1784 return 

1785 

1786 if unfinishedZero: 

1787 Util.log(" ==> Redoing unfinished zeroing out") 

1788 else: 

1789 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1790 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1791 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1792 func = lambda: util.zeroOut(self.path, offset, length) 

1793 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1794 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1795 return 

1796 

1797 if self.sizeVirt >= size: 

1798 return 

1799 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1800 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1801 

1802 msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1803 if (size <= msize): 

1804 self.sr._vhdutil.set_size_virt_fast(self.path, size) 

1805 else: 

1806 if atomic: 

1807 vdiList = self._getAllSubtree() 

1808 self.sr.lock() 

1809 try: 

1810 self.sr.pauseVDIs(vdiList) 

1811 try: 

1812 self._setSizeVirt(size) 

1813 finally: 

1814 self.sr.unpauseVDIs(vdiList) 

1815 finally: 

1816 self.sr.unlock() 

1817 else: 

1818 self._setSizeVirt(size) 

1819 

1820 self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid) 

1821 

1822 @override 

1823 def _setSizeVirt(self, size) -> None: 

1824 jfile = self.uuid + '-jvhd' 

1825 self.sr._linstor.create_volume( 

1826 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1827 ) 

1828 try: 

1829 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1830 self.sr._vhdutil.set_size_virt(size, jfile) 

1831 finally: 

1832 try: 

1833 self.sr._linstor.destroy_volume(jfile) 

1834 except Exception: 

1835 # We can ignore it, in any case this volume is not persistent. 

1836 pass 

1837 

1838 @override 

1839 def _queryVHDBlocks(self) -> bytes: 

1840 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1841 

1842 def _inflateParentForCoalesce(self): 

1843 if self.parent.raw: 

1844 return 

1845 inc = self._calcExtraSpaceForCoalescing() 

1846 if inc > 0: 

1847 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1848 

1849 @override 

1850 def _calcExtraSpaceForCoalescing(self) -> int: 

1851 if self.parent.raw: 

1852 return 0 

1853 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1854 self._getCoalescedSizeData(), self.vdi_type 

1855 ) 

1856 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1857 return size_coalesced - self.parent.getDrbdSize() 

1858 

1859 @override 

1860 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1861 assert self.getDrbdSize() > 0 

1862 assert self.getSizeVHD() > 0 

1863 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1864 assert deflate_diff >= 0 

1865 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1866 

1867 @override 

1868 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1869 assert self.getSizeVHD() > 0 

1870 return self._calcExtraSpaceForCoalescing() + \ 

1871 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1872 

1873################################################################################ 

1874# 

1875# SR 

1876# 

1877class SR(object): 

1878 class LogFilter: 

1879 def __init__(self, sr): 

1880 self.sr = sr 

1881 self.stateLogged = False 

1882 self.prevState = {} 

1883 self.currState = {} 

1884 

1885 def logState(self): 

1886 changes = "" 

1887 self.currState.clear() 

1888 for vdi in self.sr.vdiTrees: 

1889 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1890 if not self.prevState.get(vdi.uuid) or \ 

1891 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1892 changes += self.currState[vdi.uuid] 

1893 

1894 for uuid in self.prevState: 

1895 if not self.currState.get(uuid): 

1896 changes += "Tree %s gone\n" % uuid 

1897 

1898 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1899 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1900 

1901 if len(changes) > 0: 

1902 if self.stateLogged: 

1903 result += "showing only VHD trees that changed:" 

1904 result += "\n%s" % changes 

1905 else: 

1906 result += "no changes" 

1907 

1908 for line in result.split("\n"): 

1909 Util.log("%s" % line) 

1910 self.prevState.clear() 

1911 for key, val in self.currState.items(): 

1912 self.prevState[key] = val 

1913 self.stateLogged = True 

1914 

1915 def logNewVDI(self, uuid): 

1916 if self.stateLogged: 

1917 Util.log("Found new VDI when scanning: %s" % uuid) 

1918 

1919 def _getTreeStr(self, vdi, indent=8): 

1920 treeStr = "%s%s\n" % (" " * indent, vdi) 

1921 for child in vdi.children: 

1922 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1923 return treeStr 

1924 

1925 TYPE_FILE = "file" 

1926 TYPE_LVHD = "lvhd" 

1927 TYPE_LINSTOR = "linstor" 

1928 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1929 

1930 LOCK_RETRY_INTERVAL = 3 

1931 LOCK_RETRY_ATTEMPTS = 20 

1932 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1933 

1934 SCAN_RETRY_ATTEMPTS = 3 

1935 

1936 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1937 TMP_RENAME_PREFIX = "OLD_" 

1938 

1939 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1940 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1941 

1942 @staticmethod 

1943 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1944 xapi = XAPI(xapiSession, uuid) 

1945 type = normalizeType(xapi.srRecord["type"]) 

1946 if type == SR.TYPE_FILE: 

1947 return FileSR(uuid, xapi, createLock, force) 

1948 elif type == SR.TYPE_LVHD: 

1949 return LVHDSR(uuid, xapi, createLock, force) 

1950 elif type == SR.TYPE_LINSTOR: 

1951 return LinstorSR(uuid, xapi, createLock, force) 

1952 raise util.SMException("SR type %s not recognized" % type) 

1953 

1954 def __init__(self, uuid, xapi, createLock, force): 

1955 self.logFilter = self.LogFilter(self) 

1956 self.uuid = uuid 

1957 self.path = "" 

1958 self.name = "" 

1959 self.vdis = {} 

1960 self.vdiTrees = [] 

1961 self.journaler = None 

1962 self.xapi = xapi 

1963 self._locked = 0 

1964 self._srLock = None 

1965 if createLock: 1965 ↛ 1966line 1965 didn't jump to line 1966, because the condition on line 1965 was never true

1966 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1967 else: 

1968 Util.log("Requested no SR locking") 

1969 self.name = self.xapi.srRecord["name_label"] 

1970 self._failedCoalesceTargets = [] 

1971 

1972 if not self.xapi.isPluggedHere(): 

1973 if force: 1973 ↛ 1974line 1973 didn't jump to line 1974, because the condition on line 1973 was never true

1974 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1975 else: 

1976 if not self.wait_for_plug(): 

1977 raise util.SMException("SR %s not attached on this host" % uuid) 

1978 

1979 if force: 1979 ↛ 1980line 1979 didn't jump to line 1980, because the condition on line 1979 was never true

1980 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1981 elif not self.xapi.isMaster(): 1981 ↛ 1982line 1981 didn't jump to line 1982, because the condition on line 1981 was never true

1982 raise util.SMException("This host is NOT master, will not run") 

1983 

1984 self.no_space_candidates = {} 

1985 

1986 def msg_cleared(self, xapi_session, msg_ref): 

1987 try: 

1988 msg = xapi_session.xenapi.message.get_record(msg_ref) 

1989 except XenAPI.Failure: 

1990 return True 

1991 

1992 return msg is None 

1993 

1994 def check_no_space_candidates(self): 

1995 xapi_session = self.xapi.getSession() 

1996 

1997 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

1998 if self.no_space_candidates: 

1999 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2000 util.SMlog("Could not coalesce due to a lack of space " 

2001 f"in SR {self.uuid}") 

2002 msg_body = ("Unable to perform data coalesce due to a lack " 

2003 f"of space in SR {self.uuid}") 

2004 msg_id = xapi_session.xenapi.message.create( 

2005 'SM_GC_NO_SPACE', 

2006 3, 

2007 "SR", 

2008 self.uuid, 

2009 msg_body) 

2010 xapi_session.xenapi.SR.remove_from_sm_config( 

2011 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2012 xapi_session.xenapi.SR.add_to_sm_config( 

2013 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2014 

2015 for candidate in self.no_space_candidates.values(): 

2016 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2017 elif msg_id is not None: 

2018 # Everything was coalescable, remove the message 

2019 xapi_session.xenapi.message.destroy(msg_id) 

2020 

2021 def clear_no_space_msg(self, vdi): 

2022 msg_id = None 

2023 try: 

2024 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2025 except XenAPI.Failure: 

2026 pass 

2027 

2028 self.no_space_candidates.pop(vdi.uuid, None) 

2029 if msg_id is not None: 2029 ↛ exitline 2029 didn't return from function 'clear_no_space_msg', because the condition on line 2029 was never false

2030 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2031 

2032 

2033 def wait_for_plug(self): 

2034 for _ in range(1, 10): 

2035 time.sleep(2) 

2036 if self.xapi.isPluggedHere(): 

2037 return True 

2038 return False 

2039 

2040 def gcEnabled(self, refresh=True): 

2041 if refresh: 

2042 self.xapi.srRecord = \ 

2043 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2044 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2045 Util.log("GC is disabled for this SR, abort") 

2046 return False 

2047 return True 

2048 

2049 def scan(self, force=False) -> None: 

2050 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2051 update VDI objects if they already exist""" 

2052 pass 

2053 

2054 def scanLocked(self, force=False): 

2055 self.lock() 

2056 try: 

2057 self.scan(force) 

2058 finally: 

2059 self.unlock() 

2060 

2061 def getVDI(self, uuid): 

2062 return self.vdis.get(uuid) 

2063 

2064 def hasWork(self): 

2065 if len(self.findGarbage()) > 0: 

2066 return True 

2067 if self.findCoalesceable(): 

2068 return True 

2069 if self.findLeafCoalesceable(): 

2070 return True 

2071 if self.needUpdateBlockInfo(): 

2072 return True 

2073 return False 

2074 

2075 def findCoalesceable(self): 

2076 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2077 (choosing one among all coalesceable candidates according to some 

2078 criteria) or None if there is no VDI that could be coalesced""" 

2079 

2080 candidates = [] 

2081 

2082 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2083 if srSwitch == "false": 

2084 Util.log("Coalesce disabled for this SR") 

2085 return candidates 

2086 

2087 # finish any VDI for which a relink journal entry exists first 

2088 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2089 for uuid in journals: 

2090 vdi = self.getVDI(uuid) 

2091 if vdi and vdi not in self._failedCoalesceTargets: 

2092 return vdi 

2093 

2094 for vdi in self.vdis.values(): 

2095 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2096 candidates.append(vdi) 

2097 Util.log("%s is coalescable" % vdi.uuid) 

2098 

2099 self.xapi.update_task_progress("coalescable", len(candidates)) 

2100 

2101 # pick one in the tallest tree 

2102 treeHeight = dict() 

2103 for c in candidates: 

2104 height = c.getTreeRoot().getTreeHeight() 

2105 if treeHeight.get(height): 

2106 treeHeight[height].append(c) 

2107 else: 

2108 treeHeight[height] = [c] 

2109 

2110 freeSpace = self.getFreeSpace() 

2111 heights = list(treeHeight.keys()) 

2112 heights.sort(reverse=True) 

2113 for h in heights: 

2114 for c in treeHeight[h]: 

2115 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2116 if spaceNeeded <= freeSpace: 

2117 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2118 self.clear_no_space_msg(c) 

2119 return c 

2120 else: 

2121 self.no_space_candidates[c.uuid] = c 

2122 Util.log("No space to coalesce %s (free space: %d)" % \ 

2123 (c, freeSpace)) 

2124 return None 

2125 

2126 def getSwitch(self, key): 

2127 return self.xapi.srRecord["other_config"].get(key) 

2128 

2129 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2130 srSwitch = self.getSwitch(switch) 

2131 ret = False 

2132 if srSwitch: 

2133 ret = srSwitch == condition 

2134 

2135 if ret: 

2136 Util.log(fail_msg) 

2137 

2138 return ret 

2139 

2140 def leafCoalesceForbidden(self): 

2141 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2142 "false", 

2143 "Coalesce disabled for this SR") or 

2144 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2145 VDI.LEAFCLSC_DISABLED, 

2146 "Leaf-coalesce disabled for this SR")) 

2147 

2148 def findLeafCoalesceable(self): 

2149 """Find leaf-coalesceable VDIs in each VHD tree""" 

2150 

2151 candidates = [] 

2152 if self.leafCoalesceForbidden(): 

2153 return candidates 

2154 

2155 self.gatherLeafCoalesceable(candidates) 

2156 

2157 self.xapi.update_task_progress("coalescable", len(candidates)) 

2158 

2159 freeSpace = self.getFreeSpace() 

2160 for candidate in candidates: 

2161 # check the space constraints to see if leaf-coalesce is actually 

2162 # feasible for this candidate 

2163 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2164 spaceNeededLive = spaceNeeded 

2165 if spaceNeeded > freeSpace: 

2166 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2167 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2168 spaceNeeded = spaceNeededLive 

2169 

2170 if spaceNeeded <= freeSpace: 

2171 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2172 self.clear_no_space_msg(candidate) 

2173 return candidate 

2174 else: 

2175 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2176 (candidate, freeSpace)) 

2177 if spaceNeededLive <= freeSpace: 

2178 Util.log("...but enough space if skip snap-coalesce") 

2179 candidate.setConfig(VDI.DB_LEAFCLSC, 

2180 VDI.LEAFCLSC_OFFLINE) 

2181 self.no_space_candidates[candidate.uuid] = candidate 

2182 

2183 return None 

2184 

2185 def gatherLeafCoalesceable(self, candidates): 

2186 for vdi in self.vdis.values(): 

2187 if not vdi.isLeafCoalesceable(): 

2188 continue 

2189 if vdi in self._failedCoalesceTargets: 

2190 continue 

2191 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2192 Util.log("Skipping reset-on-boot %s" % vdi) 

2193 continue 

2194 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2195 Util.log("Skipping allow_caching=true %s" % vdi) 

2196 continue 

2197 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2198 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2199 continue 

2200 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2201 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2202 continue 

2203 candidates.append(vdi) 

2204 

2205 def coalesce(self, vdi, dryRun=False): 

2206 """Coalesce vdi onto parent""" 

2207 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2208 if dryRun: 2208 ↛ 2209line 2208 didn't jump to line 2209, because the condition on line 2208 was never true

2209 return 

2210 

2211 try: 

2212 self._coalesce(vdi) 

2213 except util.SMException as e: 

2214 if isinstance(e, AbortException): 2214 ↛ 2215line 2214 didn't jump to line 2215, because the condition on line 2214 was never true

2215 self.cleanup() 

2216 raise 

2217 else: 

2218 self._failedCoalesceTargets.append(vdi) 

2219 Util.logException("coalesce") 

2220 Util.log("Coalesce failed, skipping") 

2221 self.cleanup() 

2222 

2223 def coalesceLeaf(self, vdi, dryRun=False): 

2224 """Leaf-coalesce vdi onto parent""" 

2225 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2226 if dryRun: 

2227 return 

2228 

2229 try: 

2230 uuid = vdi.uuid 

2231 try: 

2232 # "vdi" object will no longer be valid after this call 

2233 self._coalesceLeaf(vdi) 

2234 finally: 

2235 vdi = self.getVDI(uuid) 

2236 if vdi: 

2237 vdi.delConfig(vdi.DB_LEAFCLSC) 

2238 except AbortException: 

2239 self.cleanup() 

2240 raise 

2241 except (util.SMException, XenAPI.Failure) as e: 

2242 self._failedCoalesceTargets.append(vdi) 

2243 Util.logException("leaf-coalesce") 

2244 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2245 self.cleanup() 

2246 

2247 def garbageCollect(self, dryRun=False): 

2248 vdiList = self.findGarbage() 

2249 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2250 for vdi in vdiList: 

2251 Util.log(" %s" % vdi) 

2252 if not dryRun: 

2253 self.deleteVDIs(vdiList) 

2254 self.cleanupJournals(dryRun) 

2255 

2256 def findGarbage(self): 

2257 vdiList = [] 

2258 for vdi in self.vdiTrees: 

2259 vdiList.extend(vdi.getAllPrunable()) 

2260 return vdiList 

2261 

2262 def deleteVDIs(self, vdiList) -> None: 

2263 for vdi in vdiList: 

2264 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2265 raise AbortException("Aborting due to signal") 

2266 Util.log("Deleting unlinked VDI %s" % vdi) 

2267 self.deleteVDI(vdi) 

2268 

2269 def deleteVDI(self, vdi) -> None: 

2270 assert(len(vdi.children) == 0) 

2271 del self.vdis[vdi.uuid] 

2272 if vdi.parent: 2272 ↛ 2274line 2272 didn't jump to line 2274, because the condition on line 2272 was never false

2273 vdi.parent.children.remove(vdi) 

2274 if vdi in self.vdiTrees: 2274 ↛ 2275line 2274 didn't jump to line 2275, because the condition on line 2274 was never true

2275 self.vdiTrees.remove(vdi) 

2276 vdi.delete() 

2277 

2278 def forgetVDI(self, vdiUuid) -> None: 

2279 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2280 

2281 def pauseVDIs(self, vdiList) -> None: 

2282 paused = [] 

2283 failed = False 

2284 for vdi in vdiList: 

2285 try: 

2286 vdi.pause() 

2287 paused.append(vdi) 

2288 except: 

2289 Util.logException("pauseVDIs") 

2290 failed = True 

2291 break 

2292 

2293 if failed: 

2294 self.unpauseVDIs(paused) 

2295 raise util.SMException("Failed to pause VDIs") 

2296 

2297 def unpauseVDIs(self, vdiList): 

2298 failed = False 

2299 for vdi in vdiList: 

2300 try: 

2301 vdi.unpause() 

2302 except: 

2303 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2304 failed = True 

2305 if failed: 

2306 raise util.SMException("Failed to unpause VDIs") 

2307 

2308 def getFreeSpace(self) -> int: 

2309 return 0 

2310 

2311 def cleanup(self): 

2312 Util.log("In cleanup") 

2313 return 

2314 

2315 @override 

2316 def __str__(self) -> str: 

2317 if self.name: 

2318 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2319 else: 

2320 ret = "%s" % self.uuid 

2321 return ret 

2322 

2323 def lock(self): 

2324 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2325 signal to avoid deadlocking (trying to acquire the SR lock while the 

2326 lock is held by a process that is trying to abort us)""" 

2327 if not self._srLock: 

2328 return 

2329 

2330 if self._locked == 0: 

2331 abortFlag = IPCFlag(self.uuid) 

2332 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2333 if self._srLock.acquireNoblock(): 

2334 self._locked += 1 

2335 return 

2336 if abortFlag.test(FLAG_TYPE_ABORT): 

2337 raise AbortException("Abort requested") 

2338 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2339 raise util.SMException("Unable to acquire the SR lock") 

2340 

2341 self._locked += 1 

2342 

2343 def unlock(self): 

2344 if not self._srLock: 2344 ↛ 2346line 2344 didn't jump to line 2346, because the condition on line 2344 was never false

2345 return 

2346 assert(self._locked > 0) 

2347 self._locked -= 1 

2348 if self._locked == 0: 

2349 self._srLock.release() 

2350 

2351 def needUpdateBlockInfo(self) -> bool: 

2352 for vdi in self.vdis.values(): 

2353 if vdi.scanError or len(vdi.children) == 0: 

2354 continue 

2355 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2356 return True 

2357 return False 

2358 

2359 def updateBlockInfo(self) -> None: 

2360 for vdi in self.vdis.values(): 

2361 if vdi.scanError or len(vdi.children) == 0: 

2362 continue 

2363 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2364 vdi.updateBlockInfo() 

2365 

2366 def cleanupCoalesceJournals(self): 

2367 """Remove stale coalesce VDI indicators""" 

2368 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2369 for uuid, jval in entries.items(): 

2370 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2371 

2372 def cleanupJournals(self, dryRun=False): 

2373 """delete journal entries for non-existing VDIs""" 

2374 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2375 entries = self.journaler.getAll(t) 

2376 for uuid, jval in entries.items(): 

2377 if self.getVDI(uuid): 

2378 continue 

2379 if t == SR.JRN_CLONE: 

2380 baseUuid, clonUuid = jval.split("_") 

2381 if self.getVDI(baseUuid): 

2382 continue 

2383 Util.log(" Deleting stale '%s' journal entry for %s " 

2384 "(%s)" % (t, uuid, jval)) 

2385 if not dryRun: 

2386 self.journaler.remove(t, uuid) 

2387 

2388 def cleanupCache(self, maxAge=-1) -> int: 

2389 return 0 

2390 

2391 def _coalesce(self, vdi): 

2392 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2392 ↛ 2395line 2392 didn't jump to line 2395, because the condition on line 2392 was never true

2393 # this means we had done the actual coalescing already and just 

2394 # need to finish relinking and/or refreshing the children 

2395 Util.log("==> Coalesce apparently already done: skipping") 

2396 else: 

2397 # JRN_COALESCE is used to check which VDI is being coalesced in 

2398 # order to decide whether to abort the coalesce. We remove the 

2399 # journal as soon as the VHD coalesce step is done, because we 

2400 # don't expect the rest of the process to take long 

2401 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2402 vdi._doCoalesce() 

2403 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2404 

2405 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2406 

2407 # we now need to relink the children: lock the SR to prevent ops 

2408 # like SM.clone from manipulating the VDIs we'll be relinking and 

2409 # rescan the SR first in case the children changed since the last 

2410 # scan 

2411 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2412 

2413 self.lock() 

2414 try: 

2415 vdi.parent._tagChildrenForRelink() 

2416 self.scan() 

2417 vdi._relinkSkip() 

2418 finally: 

2419 self.unlock() 

2420 # Reload the children to leave things consistent 

2421 vdi.parent._reloadChildren(vdi) 

2422 

2423 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2424 self.deleteVDI(vdi) 

2425 

2426 class CoalesceTracker: 

2427 GRACE_ITERATIONS = 2 

2428 MAX_ITERATIONS_NO_PROGRESS = 3 

2429 MAX_ITERATIONS = 20 

2430 MAX_INCREASE_FROM_MINIMUM = 1.2 

2431 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2432 " --> Final size {finSize}" 

2433 

2434 def __init__(self, sr): 

2435 self.itsNoProgress = 0 

2436 self.its = 0 

2437 self.minSize = float("inf") 

2438 self._history = [] 

2439 self.reason = "" 

2440 self.startSize = None 

2441 self.finishSize = None 

2442 self.sr = sr 

2443 self.grace_remaining = self.GRACE_ITERATIONS 

2444 

2445 @property 

2446 def history(self): 

2447 return [x['msg'] for x in self._history] 

2448 

2449 def moving_average(self): 

2450 """ 

2451 Calculate a three point moving average 

2452 """ 

2453 assert len(self._history) >= 3 

2454 

2455 mv_average = sum([x['finalsize'] for x in self._history]) / len(self._history) 

2456 util.SMlog(f'Calculated moving average as {mv_average}') 

2457 return mv_average 

2458 

2459 def abortCoalesce(self, prevSize, curSize): 

2460 self.its += 1 

2461 self._history.append( 

2462 { 

2463 'finalsize': curSize, 

2464 'msg': self.HISTORY_STRING.format(its=self.its, 

2465 initSize=prevSize, 

2466 finSize=curSize) 

2467 } 

2468 ) 

2469 

2470 self.finishSize = curSize 

2471 

2472 if self.startSize is None: 

2473 self.startSize = prevSize 

2474 

2475 if curSize < self.minSize: 

2476 self.minSize = curSize 

2477 

2478 if prevSize < self.minSize: 

2479 self.minSize = prevSize 

2480 

2481 if self.its < 4: 

2482 # Perform at least three iterations 

2483 return False 

2484 

2485 if prevSize >= curSize or curSize < self.moving_average(): 

2486 # We made progress 

2487 return False 

2488 else: 

2489 self.itsNoProgress += 1 

2490 Util.log("No progress, attempt:" 

2491 " {attempt}".format(attempt=self.itsNoProgress)) 

2492 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2493 

2494 if self.its > self.MAX_ITERATIONS: 

2495 max = self.MAX_ITERATIONS 

2496 self.reason = \ 

2497 "Max iterations ({max}) exceeded".format(max=max) 

2498 return True 

2499 

2500 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2501 max = self.MAX_ITERATIONS_NO_PROGRESS 

2502 self.reason = \ 

2503 "No progress made for {max} iterations".format(max=max) 

2504 return True 

2505 

2506 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2507 if curSize > maxSizeFromMin: 

2508 self.grace_remaining -= 1 

2509 if self.grace_remaining == 0: 

2510 self.reason = "Unexpected bump in size," \ 

2511 " compared to minimum achieved" 

2512 

2513 return True 

2514 

2515 return False 

2516 

2517 def printSizes(self): 

2518 Util.log("Starting size was {size}" 

2519 .format(size=self.startSize)) 

2520 Util.log("Final size was {size}" 

2521 .format(size=self.finishSize)) 

2522 Util.log("Minimum size achieved was {size}" 

2523 .format(size=self.minSize)) 

2524 

2525 def printReasoning(self): 

2526 Util.log("Aborted coalesce") 

2527 for hist in self.history: 

2528 Util.log(hist) 

2529 Util.log(self.reason) 

2530 self.printSizes() 

2531 

2532 def printSummary(self): 

2533 if self.its == 0: 

2534 return 

2535 

2536 if self.reason: 2536 ↛ 2537line 2536 didn't jump to line 2537, because the condition on line 2536 was never true

2537 Util.log("Aborted coalesce") 

2538 Util.log(self.reason) 

2539 else: 

2540 Util.log("Coalesce summary") 

2541 

2542 Util.log(f"Performed {self.its} iterations") 

2543 self.printSizes() 

2544 

2545 

2546 def _coalesceLeaf(self, vdi): 

2547 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2548 complete due to external changes, namely vdi_delete and vdi_snapshot 

2549 that alter leaf-coalescibility of vdi""" 

2550 tracker = self.CoalesceTracker(self) 

2551 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2552 prevSizeVHD = vdi.getSizeVHD() 

2553 if not self._snapshotCoalesce(vdi): 2553 ↛ 2554line 2553 didn't jump to line 2554, because the condition on line 2553 was never true

2554 return False 

2555 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2556 tracker.printReasoning() 

2557 raise util.SMException("VDI {uuid} could not be coalesced" 

2558 .format(uuid=vdi.uuid)) 

2559 tracker.printSummary() 

2560 return self._liveLeafCoalesce(vdi) 

2561 

2562 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2563 speed = None 

2564 total_time = endTime - startTime 

2565 if total_time > 0: 

2566 speed = float(vhdSize) / float(total_time) 

2567 return speed 

2568 

2569 def writeSpeedToFile(self, speed): 

2570 content = [] 

2571 speedFile = None 

2572 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2573 self.lock() 

2574 try: 

2575 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2576 lines = "" 

2577 if not os.path.isfile(path): 

2578 lines = str(speed) + "\n" 

2579 else: 

2580 speedFile = open(path, "r+") 

2581 content = speedFile.readlines() 

2582 content.append(str(speed) + "\n") 

2583 if len(content) > N_RUNNING_AVERAGE: 

2584 del content[0] 

2585 lines = "".join(content) 

2586 

2587 util.atomicFileWrite(path, VAR_RUN, lines) 

2588 finally: 

2589 if speedFile is not None: 

2590 speedFile.close() 

2591 Util.log("Closing file: {myfile}".format(myfile=path)) 

2592 self.unlock() 

2593 

2594 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2595 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2596 if speed is None: 

2597 return 

2598 

2599 self.writeSpeedToFile(speed) 

2600 

2601 def getStorageSpeed(self): 

2602 speedFile = None 

2603 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2604 self.lock() 

2605 try: 

2606 speed = None 

2607 if os.path.isfile(path): 

2608 speedFile = open(path) 

2609 content = speedFile.readlines() 

2610 try: 

2611 content = [float(i) for i in content] 

2612 except ValueError: 

2613 Util.log("Something bad in the speed log:{log}". 

2614 format(log=speedFile.readlines())) 

2615 return speed 

2616 

2617 if len(content): 

2618 speed = sum(content) / float(len(content)) 

2619 if speed <= 0: 2619 ↛ 2621line 2619 didn't jump to line 2621, because the condition on line 2619 was never true

2620 # Defensive, should be impossible. 

2621 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2622 format(speed=speed, uuid=self.uuid)) 

2623 speed = None 

2624 else: 

2625 Util.log("Speed file empty for SR: {uuid}". 

2626 format(uuid=self.uuid)) 

2627 else: 

2628 Util.log("Speed log missing for SR: {uuid}". 

2629 format(uuid=self.uuid)) 

2630 return speed 

2631 finally: 

2632 if not (speedFile is None): 

2633 speedFile.close() 

2634 self.unlock() 

2635 

2636 def _snapshotCoalesce(self, vdi): 

2637 # Note that because we are not holding any locks here, concurrent SM 

2638 # operations may change this tree under our feet. In particular, vdi 

2639 # can be deleted, or it can be snapshotted. 

2640 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2641 Util.log("Single-snapshotting %s" % vdi) 

2642 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2643 try: 

2644 ret = self.xapi.singleSnapshotVDI(vdi) 

2645 Util.log("Single-snapshot returned: %s" % ret) 

2646 except XenAPI.Failure as e: 

2647 if util.isInvalidVDI(e): 

2648 Util.log("The VDI appears to have been concurrently deleted") 

2649 return False 

2650 raise 

2651 self.scanLocked() 

2652 tempSnap = vdi.parent 

2653 if not tempSnap.isCoalesceable(): 

2654 Util.log("The VDI appears to have been concurrently snapshotted") 

2655 return False 

2656 Util.log("Coalescing parent %s" % tempSnap) 

2657 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2658 vhdSize = vdi.getSizeVHD() 

2659 self._coalesce(tempSnap) 

2660 if not vdi.isLeafCoalesceable(): 

2661 Util.log("The VDI tree appears to have been altered since") 

2662 return False 

2663 return True 

2664 

2665 def _liveLeafCoalesce(self, vdi) -> bool: 

2666 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2667 self.lock() 

2668 try: 

2669 self.scan() 

2670 if not self.getVDI(vdi.uuid): 

2671 Util.log("The VDI appears to have been deleted meanwhile") 

2672 return False 

2673 if not vdi.isLeafCoalesceable(): 

2674 Util.log("The VDI is no longer leaf-coalesceable") 

2675 return False 

2676 

2677 uuid = vdi.uuid 

2678 vdi.pause(failfast=True) 

2679 try: 

2680 try: 

2681 # "vdi" object will no longer be valid after this call 

2682 self._doCoalesceLeaf(vdi) 

2683 except: 

2684 Util.logException("_doCoalesceLeaf") 

2685 self._handleInterruptedCoalesceLeaf() 

2686 raise 

2687 finally: 

2688 vdi = self.getVDI(uuid) 

2689 if vdi: 

2690 vdi.ensureUnpaused() 

2691 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2692 if vdiOld: 

2693 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2694 self.deleteVDI(vdiOld) 

2695 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2696 finally: 

2697 self.cleanup() 

2698 self.unlock() 

2699 self.logFilter.logState() 

2700 return True 

2701 

2702 def _doCoalesceLeaf(self, vdi): 

2703 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2704 offline/atomic context""" 

2705 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2706 self._prepareCoalesceLeaf(vdi) 

2707 vdi.parent._setHidden(False) 

2708 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2709 vdi.validate(True) 

2710 vdi.parent.validate(True) 

2711 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2712 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2713 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2713 ↛ 2714line 2713 didn't jump to line 2714, because the condition on line 2713 was never true

2714 Util.log("Leaf-coalesce forced, will not use timeout") 

2715 timeout = 0 

2716 vdi._coalesceVHD(timeout) 

2717 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2718 vdi.parent.validate(True) 

2719 #vdi._verifyContents(timeout / 2) 

2720 

2721 # rename 

2722 vdiUuid = vdi.uuid 

2723 oldName = vdi.fileName 

2724 origParentUuid = vdi.parent.uuid 

2725 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2726 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2727 vdi.parent.rename(vdiUuid) 

2728 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2729 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2730 

2731 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2732 # garbage 

2733 

2734 # update the VDI record 

2735 if vdi.parent.raw: 2735 ↛ 2736line 2735 didn't jump to line 2736, because the condition on line 2735 was never true

2736 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2737 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2738 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2739 

2740 self._updateNode(vdi) 

2741 

2742 # delete the obsolete leaf & inflate the parent (in that order, to 

2743 # minimize free space requirements) 

2744 parent = vdi.parent 

2745 vdi._setHidden(True) 

2746 vdi.parent.children = [] 

2747 vdi.parent = None 

2748 

2749 if parent.parent is None: 

2750 parent.delConfig(VDI.DB_VHD_PARENT) 

2751 

2752 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2753 freeSpace = self.getFreeSpace() 

2754 if freeSpace < extraSpace: 2754 ↛ 2757line 2754 didn't jump to line 2757, because the condition on line 2754 was never true

2755 # don't delete unless we need the space: deletion is time-consuming 

2756 # because it requires contacting the slaves, and we're paused here 

2757 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2758 self.deleteVDI(vdi) 

2759 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2760 

2761 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2762 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2763 

2764 self.forgetVDI(origParentUuid) 

2765 self._finishCoalesceLeaf(parent) 

2766 self._updateSlavesOnResize(parent) 

2767 

2768 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2769 assert(not parent.raw) # raw parents not supported 

2770 extra = child.getSizeVHD() - parent.getSizeVHD() 

2771 if extra < 0: 2771 ↛ 2772line 2771 didn't jump to line 2772, because the condition on line 2771 was never true

2772 extra = 0 

2773 return extra 

2774 

2775 def _prepareCoalesceLeaf(self, vdi) -> None: 

2776 pass 

2777 

2778 def _updateNode(self, vdi) -> None: 

2779 pass 

2780 

2781 def _finishCoalesceLeaf(self, parent) -> None: 

2782 pass 

2783 

2784 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2785 pass 

2786 

2787 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2788 pass 

2789 

2790 def _updateSlavesOnResize(self, vdi) -> None: 

2791 pass 

2792 

2793 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2794 for uuid in list(self.vdis.keys()): 

2795 if not uuid in uuidsPresent: 

2796 Util.log("VDI %s disappeared since last scan" % \ 

2797 self.vdis[uuid]) 

2798 del self.vdis[uuid] 

2799 

2800 def _handleInterruptedCoalesceLeaf(self) -> None: 

2801 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2802 inconsistent state. If the old-leaf VDI is still present, we revert the 

2803 operation (in case the original error is persistent); otherwise we must 

2804 finish the operation""" 

2805 pass 

2806 

2807 def _buildTree(self, force): 

2808 self.vdiTrees = [] 

2809 for vdi in self.vdis.values(): 

2810 if vdi.parentUuid: 

2811 parent = self.getVDI(vdi.parentUuid) 

2812 if not parent: 

2813 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2814 self.vdiTrees.append(vdi) 

2815 continue 

2816 if force: 

2817 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2818 (vdi.parentUuid, vdi.uuid)) 

2819 self.vdiTrees.append(vdi) 

2820 continue 

2821 else: 

2822 raise util.SMException("Parent VDI %s of %s not " \ 

2823 "found" % (vdi.parentUuid, vdi.uuid)) 

2824 vdi.parent = parent 

2825 parent.children.append(vdi) 

2826 else: 

2827 self.vdiTrees.append(vdi) 

2828 

2829 

2830class FileSR(SR): 

2831 TYPE = SR.TYPE_FILE 

2832 CACHE_FILE_EXT = ".vhdcache" 

2833 # cache cleanup actions 

2834 CACHE_ACTION_KEEP = 0 

2835 CACHE_ACTION_REMOVE = 1 

2836 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2837 

2838 def __init__(self, uuid, xapi, createLock, force): 

2839 SR.__init__(self, uuid, xapi, createLock, force) 

2840 self.path = "/var/run/sr-mount/%s" % self.uuid 

2841 self.journaler = fjournaler.Journaler(self.path) 

2842 

2843 @override 

2844 def scan(self, force=False) -> None: 

2845 if not util.pathexists(self.path): 

2846 raise util.SMException("directory %s not found!" % self.uuid) 

2847 vhds = self._scan(force) 

2848 for uuid, vhdInfo in vhds.items(): 

2849 vdi = self.getVDI(uuid) 

2850 if not vdi: 

2851 self.logFilter.logNewVDI(uuid) 

2852 vdi = FileVDI(self, uuid, False) 

2853 self.vdis[uuid] = vdi 

2854 vdi.load(vhdInfo) 

2855 uuidsPresent = list(vhds.keys()) 

2856 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2857 for rawName in rawList: 

2858 uuid = FileVDI.extractUuid(rawName) 

2859 uuidsPresent.append(uuid) 

2860 vdi = self.getVDI(uuid) 

2861 if not vdi: 

2862 self.logFilter.logNewVDI(uuid) 

2863 vdi = FileVDI(self, uuid, True) 

2864 self.vdis[uuid] = vdi 

2865 self._removeStaleVDIs(uuidsPresent) 

2866 self._buildTree(force) 

2867 self.logFilter.logState() 

2868 self._handleInterruptedCoalesceLeaf() 

2869 

2870 @override 

2871 def getFreeSpace(self) -> int: 

2872 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2873 

2874 @override 

2875 def deleteVDIs(self, vdiList) -> None: 

2876 rootDeleted = False 

2877 for vdi in vdiList: 

2878 if not vdi.parent: 

2879 rootDeleted = True 

2880 break 

2881 SR.deleteVDIs(self, vdiList) 

2882 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2883 self.xapi.markCacheSRsDirty() 

2884 

2885 @override 

2886 def cleanupCache(self, maxAge=-1) -> int: 

2887 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2888 removed when the leaf node no longer exists or its allow-caching 

2889 attribute is not set. Caches for parent nodes are removed when the 

2890 parent node no longer exists or it hasn't been used in more than 

2891 <maxAge> hours. 

2892 Return number of caches removed. 

2893 """ 

2894 numRemoved = 0 

2895 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2896 Util.log("Found %d cache files" % len(cacheFiles)) 

2897 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2898 for cacheFile in cacheFiles: 

2899 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2900 action = self.CACHE_ACTION_KEEP 

2901 rec = self.xapi.getRecordVDI(uuid) 

2902 if not rec: 

2903 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2904 action = self.CACHE_ACTION_REMOVE 

2905 elif rec["managed"] and not rec["allow_caching"]: 

2906 Util.log("Cache %s: caching disabled" % uuid) 

2907 action = self.CACHE_ACTION_REMOVE 

2908 elif not rec["managed"] and maxAge >= 0: 

2909 lastAccess = datetime.datetime.fromtimestamp( \ 

2910 os.path.getatime(os.path.join(self.path, cacheFile))) 

2911 if lastAccess < cutoff: 

2912 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2913 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2914 

2915 if action == self.CACHE_ACTION_KEEP: 

2916 Util.log("Keeping cache %s" % uuid) 

2917 continue 

2918 

2919 lockId = uuid 

2920 parentUuid = None 

2921 if rec and rec["managed"]: 

2922 parentUuid = rec["sm_config"].get("vhd-parent") 

2923 if parentUuid: 

2924 lockId = parentUuid 

2925 

2926 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2927 cacheLock.acquire() 

2928 try: 

2929 if self._cleanupCache(uuid, action): 

2930 numRemoved += 1 

2931 finally: 

2932 cacheLock.release() 

2933 return numRemoved 

2934 

2935 def _cleanupCache(self, uuid, action): 

2936 assert(action != self.CACHE_ACTION_KEEP) 

2937 rec = self.xapi.getRecordVDI(uuid) 

2938 if rec and rec["allow_caching"]: 

2939 Util.log("Cache %s appears to have become valid" % uuid) 

2940 return False 

2941 

2942 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2943 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2944 if tapdisk: 

2945 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2946 Util.log("Cache %s still in use" % uuid) 

2947 return False 

2948 Util.log("Shutting down tapdisk for %s" % fullPath) 

2949 tapdisk.shutdown() 

2950 

2951 Util.log("Deleting file %s" % fullPath) 

2952 os.unlink(fullPath) 

2953 return True 

2954 

2955 def _isCacheFileName(self, name): 

2956 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2957 name.endswith(self.CACHE_FILE_EXT) 

2958 

2959 def _scan(self, force): 

2960 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2961 error = False 

2962 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2963 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2964 for uuid, vhdInfo in vhds.items(): 

2965 if vhdInfo.error: 

2966 error = True 

2967 break 

2968 if not error: 

2969 return vhds 

2970 Util.log("Scan error on attempt %d" % i) 

2971 if force: 

2972 return vhds 

2973 raise util.SMException("Scan error") 

2974 

2975 @override 

2976 def deleteVDI(self, vdi) -> None: 

2977 self._checkSlaves(vdi) 

2978 SR.deleteVDI(self, vdi) 

2979 

2980 def _checkSlaves(self, vdi): 

2981 onlineHosts = self.xapi.getOnlineHosts() 

2982 abortFlag = IPCFlag(self.uuid) 

2983 for pbdRecord in self.xapi.getAttachedPBDs(): 

2984 hostRef = pbdRecord["host"] 

2985 if hostRef == self.xapi._hostRef: 

2986 continue 

2987 if abortFlag.test(FLAG_TYPE_ABORT): 

2988 raise AbortException("Aborting due to signal") 

2989 try: 

2990 self._checkSlave(hostRef, vdi) 

2991 except util.CommandException: 

2992 if hostRef in onlineHosts: 

2993 raise 

2994 

2995 def _checkSlave(self, hostRef, vdi): 

2996 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2997 Util.log("Checking with slave: %s" % repr(call)) 

2998 _host = self.xapi.session.xenapi.host 

2999 text = _host.call_plugin( * call) 

3000 

3001 @override 

3002 def _handleInterruptedCoalesceLeaf(self) -> None: 

3003 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3004 for uuid, parentUuid in entries.items(): 

3005 fileList = os.listdir(self.path) 

3006 childName = uuid + vhdutil.FILE_EXTN_VHD 

3007 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

3008 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

3009 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

3010 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3011 if parentPresent or tmpChildName in fileList: 

3012 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3013 else: 

3014 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3015 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3016 vdi = self.getVDI(uuid) 

3017 if vdi: 

3018 vdi.ensureUnpaused() 

3019 

3020 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3021 Util.log("*** UNDO LEAF-COALESCE") 

3022 parent = self.getVDI(parentUuid) 

3023 if not parent: 

3024 parent = self.getVDI(childUuid) 

3025 if not parent: 

3026 raise util.SMException("Neither %s nor %s found" % \ 

3027 (parentUuid, childUuid)) 

3028 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3029 parent.rename(parentUuid) 

3030 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3031 

3032 child = self.getVDI(childUuid) 

3033 if not child: 

3034 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3035 if not child: 

3036 raise util.SMException("Neither %s nor %s found" % \ 

3037 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3038 Util.log("Renaming child back to %s" % childUuid) 

3039 child.rename(childUuid) 

3040 Util.log("Updating the VDI record") 

3041 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3042 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3043 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3044 

3045 if child.isHidden(): 

3046 child._setHidden(False) 

3047 if not parent.isHidden(): 

3048 parent._setHidden(True) 

3049 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3050 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3051 Util.log("*** leaf-coalesce undo successful") 

3052 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3053 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3054 

3055 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3056 Util.log("*** FINISH LEAF-COALESCE") 

3057 vdi = self.getVDI(childUuid) 

3058 if not vdi: 

3059 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3060 raise util.SMException("VDI %s not found" % childUuid) 

3061 try: 

3062 self.forgetVDI(parentUuid) 

3063 except XenAPI.Failure: 

3064 Util.logException('_finishInterruptedCoalesceLeaf') 

3065 pass 

3066 self._updateSlavesOnResize(vdi) 

3067 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3068 Util.log("*** finished leaf-coalesce successfully") 

3069 

3070 

3071class LVHDSR(SR): 

3072 TYPE = SR.TYPE_LVHD 

3073 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3074 

3075 def __init__(self, uuid, xapi, createLock, force): 

3076 SR.__init__(self, uuid, xapi, createLock, force) 

3077 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

3078 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

3079 

3080 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3081 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3082 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3083 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3084 

3085 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3086 self.journaler = journaler.Journaler(self.lvmCache) 

3087 

3088 @override 

3089 def deleteVDI(self, vdi) -> None: 

3090 if self.lvActivator.get(vdi.uuid, False): 

3091 self.lvActivator.deactivate(vdi.uuid, False) 

3092 self._checkSlaves(vdi) 

3093 SR.deleteVDI(self, vdi) 

3094 

3095 @override 

3096 def forgetVDI(self, vdiUuid) -> None: 

3097 SR.forgetVDI(self, vdiUuid) 

3098 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3099 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3100 

3101 @override 

3102 def getFreeSpace(self) -> int: 

3103 stats = lvutil._getVGstats(self.vgName) 

3104 return stats['physical_size'] - stats['physical_utilisation'] 

3105 

3106 @override 

3107 def cleanup(self): 

3108 if not self.lvActivator.deactivateAll(): 

3109 Util.log("ERROR deactivating LVs while cleaning up") 

3110 

3111 @override 

3112 def needUpdateBlockInfo(self) -> bool: 

3113 for vdi in self.vdis.values(): 

3114 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3115 continue 

3116 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3117 return True 

3118 return False 

3119 

3120 @override 

3121 def updateBlockInfo(self) -> None: 

3122 numUpdated = 0 

3123 for vdi in self.vdis.values(): 

3124 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3125 continue 

3126 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3127 vdi.updateBlockInfo() 

3128 numUpdated += 1 

3129 if numUpdated: 

3130 # deactivate the LVs back sooner rather than later. If we don't 

3131 # now, by the time this thread gets to deactivations, another one 

3132 # might have leaf-coalesced a node and deleted it, making the child 

3133 # inherit the refcount value and preventing the correct decrement 

3134 self.cleanup() 

3135 

3136 @override 

3137 def scan(self, force=False) -> None: 

3138 vdis = self._scan(force) 

3139 for uuid, vdiInfo in vdis.items(): 

3140 vdi = self.getVDI(uuid) 

3141 if not vdi: 

3142 self.logFilter.logNewVDI(uuid) 

3143 vdi = LVHDVDI(self, uuid, 

3144 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

3145 self.vdis[uuid] = vdi 

3146 vdi.load(vdiInfo) 

3147 self._removeStaleVDIs(vdis.keys()) 

3148 self._buildTree(force) 

3149 self.logFilter.logState() 

3150 self._handleInterruptedCoalesceLeaf() 

3151 

3152 def _scan(self, force): 

3153 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3154 error = False 

3155 self.lvmCache.refresh() 

3156 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

3157 for uuid, vdiInfo in vdis.items(): 

3158 if vdiInfo.scanError: 

3159 error = True 

3160 break 

3161 if not error: 

3162 return vdis 

3163 Util.log("Scan error, retrying (%d)" % i) 

3164 if force: 

3165 return vdis 

3166 raise util.SMException("Scan error") 

3167 

3168 @override 

3169 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3170 for uuid in list(self.vdis.keys()): 

3171 if not uuid in uuidsPresent: 

3172 Util.log("VDI %s disappeared since last scan" % \ 

3173 self.vdis[uuid]) 

3174 del self.vdis[uuid] 

3175 if self.lvActivator.get(uuid, False): 

3176 self.lvActivator.remove(uuid, False) 

3177 

3178 @override 

3179 def _liveLeafCoalesce(self, vdi) -> bool: 

3180 """If the parent is raw and the child was resized (virt. size), then 

3181 we'll need to resize the parent, which can take a while due to zeroing 

3182 out of the extended portion of the LV. Do it before pausing the child 

3183 to avoid a protracted downtime""" 

3184 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

3185 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3186 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3187 

3188 return SR._liveLeafCoalesce(self, vdi) 

3189 

3190 @override 

3191 def _prepareCoalesceLeaf(self, vdi) -> None: 

3192 vdi._activateChain() 

3193 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3194 vdi.deflate() 

3195 vdi.inflateParentForCoalesce() 

3196 

3197 @override 

3198 def _updateNode(self, vdi) -> None: 

3199 # fix the refcounts: the remaining node should inherit the binary 

3200 # refcount from the leaf (because if it was online, it should remain 

3201 # refcounted as such), but the normal refcount from the parent (because 

3202 # this node is really the parent node) - minus 1 if it is online (since 

3203 # non-leaf nodes increment their normal counts when they are online and 

3204 # we are now a leaf, storing that 1 in the binary refcount). 

3205 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3206 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3207 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3208 pCnt = pCnt - cBcnt 

3209 assert(pCnt >= 0) 

3210 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3211 

3212 @override 

3213 def _finishCoalesceLeaf(self, parent) -> None: 

3214 if not parent.isSnapshot() or parent.isAttachedRW(): 

3215 parent.inflateFully() 

3216 else: 

3217 parent.deflate() 

3218 

3219 @override 

3220 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3221 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

3222 

3223 @override 

3224 def _handleInterruptedCoalesceLeaf(self) -> None: 

3225 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3226 for uuid, parentUuid in entries.items(): 

3227 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

3228 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3229 self.TMP_RENAME_PREFIX + uuid 

3230 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

3231 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

3232 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

3233 self.lvmCache.checkLV(parentLV2)) 

3234 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

3235 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3236 else: 

3237 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3238 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3239 vdi = self.getVDI(uuid) 

3240 if vdi: 

3241 vdi.ensureUnpaused() 

3242 

3243 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3244 Util.log("*** UNDO LEAF-COALESCE") 

3245 parent = self.getVDI(parentUuid) 

3246 if not parent: 

3247 parent = self.getVDI(childUuid) 

3248 if not parent: 

3249 raise util.SMException("Neither %s nor %s found" % \ 

3250 (parentUuid, childUuid)) 

3251 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3252 parent.rename(parentUuid) 

3253 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3254 

3255 child = self.getVDI(childUuid) 

3256 if not child: 

3257 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3258 if not child: 

3259 raise util.SMException("Neither %s nor %s found" % \ 

3260 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3261 Util.log("Renaming child back to %s" % childUuid) 

3262 child.rename(childUuid) 

3263 Util.log("Updating the VDI record") 

3264 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3265 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3266 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3267 

3268 # refcount (best effort - assume that it had succeeded if the 

3269 # second rename succeeded; if not, this adjustment will be wrong, 

3270 # leading to a non-deactivation of the LV) 

3271 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3272 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3273 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3274 pCnt = pCnt + cBcnt 

3275 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3276 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3277 

3278 parent.deflate() 

3279 child.inflateFully() 

3280 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3281 if child.isHidden(): 

3282 child._setHidden(False) 

3283 if not parent.isHidden(): 

3284 parent._setHidden(True) 

3285 if not parent.lvReadonly: 

3286 self.lvmCache.setReadonly(parent.fileName, True) 

3287 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3288 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3289 Util.log("*** leaf-coalesce undo successful") 

3290 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3291 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3292 

3293 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3294 Util.log("*** FINISH LEAF-COALESCE") 

3295 vdi = self.getVDI(childUuid) 

3296 if not vdi: 

3297 raise util.SMException("VDI %s not found" % childUuid) 

3298 vdi.inflateFully() 

3299 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3300 try: 

3301 self.forgetVDI(parentUuid) 

3302 except XenAPI.Failure: 

3303 pass 

3304 self._updateSlavesOnResize(vdi) 

3305 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3306 Util.log("*** finished leaf-coalesce successfully") 

3307 

3308 def _checkSlaves(self, vdi): 

3309 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3310 try to check all slaves, including those that the Agent believes are 

3311 offline, but ignore failures for offline hosts. This is to avoid cases 

3312 where the Agent thinks a host is offline but the host is up.""" 

3313 args = {"vgName": self.vgName, 

3314 "action1": "deactivateNoRefcount", 

3315 "lvName1": vdi.fileName, 

3316 "action2": "cleanupLockAndRefcount", 

3317 "uuid2": vdi.uuid, 

3318 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3319 onlineHosts = self.xapi.getOnlineHosts() 

3320 abortFlag = IPCFlag(self.uuid) 

3321 for pbdRecord in self.xapi.getAttachedPBDs(): 

3322 hostRef = pbdRecord["host"] 

3323 if hostRef == self.xapi._hostRef: 

3324 continue 

3325 if abortFlag.test(FLAG_TYPE_ABORT): 

3326 raise AbortException("Aborting due to signal") 

3327 Util.log("Checking with slave %s (path %s)" % ( 

3328 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3329 try: 

3330 self.xapi.ensureInactive(hostRef, args) 

3331 except XenAPI.Failure: 

3332 if hostRef in onlineHosts: 

3333 raise 

3334 

3335 @override 

3336 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3337 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3338 if not slaves: 

3339 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3340 child) 

3341 return 

3342 

3343 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3344 self.TMP_RENAME_PREFIX + child.uuid 

3345 args = {"vgName": self.vgName, 

3346 "action1": "deactivateNoRefcount", 

3347 "lvName1": tmpName, 

3348 "action2": "deactivateNoRefcount", 

3349 "lvName2": child.fileName, 

3350 "action3": "refresh", 

3351 "lvName3": child.fileName, 

3352 "action4": "refresh", 

3353 "lvName4": parent.fileName} 

3354 for slave in slaves: 

3355 Util.log("Updating %s, %s, %s on slave %s" % \ 

3356 (tmpName, child.fileName, parent.fileName, 

3357 self.xapi.getRecordHost(slave)['hostname'])) 

3358 text = self.xapi.session.xenapi.host.call_plugin( \ 

3359 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3360 Util.log("call-plugin returned: '%s'" % text) 

3361 

3362 @override 

3363 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3364 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3365 if not slaves: 

3366 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3367 return 

3368 

3369 args = {"vgName": self.vgName, 

3370 "action1": "deactivateNoRefcount", 

3371 "lvName1": oldNameLV, 

3372 "action2": "refresh", 

3373 "lvName2": vdi.fileName, 

3374 "action3": "cleanupLockAndRefcount", 

3375 "uuid3": origParentUuid, 

3376 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3377 for slave in slaves: 

3378 Util.log("Updating %s to %s on slave %s" % \ 

3379 (oldNameLV, vdi.fileName, 

3380 self.xapi.getRecordHost(slave)['hostname'])) 

3381 text = self.xapi.session.xenapi.host.call_plugin( \ 

3382 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3383 Util.log("call-plugin returned: '%s'" % text) 

3384 

3385 @override 

3386 def _updateSlavesOnResize(self, vdi) -> None: 

3387 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3388 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3389 if not slaves: 

3390 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3391 return 

3392 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3393 vdi.fileName, vdi.uuid, slaves) 

3394 

3395 

3396class LinstorSR(SR): 

3397 TYPE = SR.TYPE_LINSTOR 

3398 

3399 def __init__(self, uuid, xapi, createLock, force): 

3400 if not LINSTOR_AVAILABLE: 

3401 raise util.SMException( 

3402 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3403 ) 

3404 

3405 SR.__init__(self, uuid, xapi, createLock, force) 

3406 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3407 self._reloadLinstor(journaler_only=True) 

3408 

3409 @override 

3410 def deleteVDI(self, vdi) -> None: 

3411 self._checkSlaves(vdi) 

3412 SR.deleteVDI(self, vdi) 

3413 

3414 @override 

3415 def getFreeSpace(self) -> int: 

3416 return self._linstor.max_volume_size_allowed 

3417 

3418 @override 

3419 def scan(self, force=False) -> None: 

3420 all_vdi_info = self._scan(force) 

3421 for uuid, vdiInfo in all_vdi_info.items(): 

3422 # When vdiInfo is None, the VDI is RAW. 

3423 vdi = self.getVDI(uuid) 

3424 if not vdi: 

3425 self.logFilter.logNewVDI(uuid) 

3426 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3427 self.vdis[uuid] = vdi 

3428 if vdiInfo: 

3429 vdi.load(vdiInfo) 

3430 self._removeStaleVDIs(all_vdi_info.keys()) 

3431 self._buildTree(force) 

3432 self.logFilter.logState() 

3433 self._handleInterruptedCoalesceLeaf() 

3434 

3435 @override 

3436 def pauseVDIs(self, vdiList) -> None: 

3437 self._linstor.ensure_volume_list_is_not_locked( 

3438 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3439 ) 

3440 return super(LinstorSR, self).pauseVDIs(vdiList) 

3441 

3442 def _reloadLinstor(self, journaler_only=False): 

3443 session = self.xapi.session 

3444 host_ref = util.get_this_host_ref(session) 

3445 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3446 

3447 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3448 if pbd is None: 

3449 raise util.SMException('Failed to find PBD') 

3450 

3451 dconf = session.xenapi.PBD.get_device_config(pbd) 

3452 group_name = dconf['group-name'] 

3453 

3454 controller_uri = get_controller_uri() 

3455 self.journaler = LinstorJournaler( 

3456 controller_uri, group_name, logger=util.SMlog 

3457 ) 

3458 

3459 if journaler_only: 

3460 return 

3461 

3462 self._linstor = LinstorVolumeManager( 

3463 controller_uri, 

3464 group_name, 

3465 repair=True, 

3466 logger=util.SMlog 

3467 ) 

3468 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3469 

3470 def _scan(self, force): 

3471 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3472 self._reloadLinstor() 

3473 error = False 

3474 try: 

3475 all_vdi_info = self._load_vdi_info() 

3476 for uuid, vdiInfo in all_vdi_info.items(): 

3477 if vdiInfo and vdiInfo.error: 

3478 error = True 

3479 break 

3480 if not error: 

3481 return all_vdi_info 

3482 Util.log('Scan error, retrying ({})'.format(i)) 

3483 except Exception as e: 

3484 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3485 Util.log(traceback.format_exc()) 

3486 

3487 if force: 

3488 return all_vdi_info 

3489 raise util.SMException('Scan error') 

3490 

3491 def _load_vdi_info(self): 

3492 all_vdi_info = {} 

3493 

3494 # TODO: Ensure metadata contains the right info. 

3495 

3496 all_volume_info = self._linstor.get_volumes_with_info() 

3497 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3498 for vdi_uuid, volume_info in all_volume_info.items(): 

3499 try: 

3500 volume_metadata = volumes_metadata[vdi_uuid] 

3501 if not volume_info.name and not list(volume_metadata.items()): 

3502 continue # Ignore it, probably deleted. 

3503 

3504 if vdi_uuid.startswith('DELETED_'): 

3505 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3506 # We must remove this VDI now without adding it in the VDI list. 

3507 # Otherwise `Relinking` calls and other actions can be launched on it. 

3508 # We don't want that... 

3509 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3510 

3511 self.lock() 

3512 try: 

3513 self._linstor.destroy_volume(vdi_uuid) 

3514 try: 

3515 self.forgetVDI(vdi_uuid) 

3516 except: 

3517 pass 

3518 except Exception as e: 

3519 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3520 finally: 

3521 self.unlock() 

3522 continue 

3523 

3524 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3525 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3526 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3527 # Always RAW! 

3528 info = None 

3529 elif vdi_type == vhdutil.VDI_TYPE_VHD: 

3530 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3531 else: 

3532 # Ensure it's not a VHD... 

3533 try: 

3534 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3535 except: 

3536 try: 

3537 self._vhdutil.force_repair( 

3538 self._linstor.get_device_path(vdi_uuid) 

3539 ) 

3540 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3541 except: 

3542 info = None 

3543 

3544 except Exception as e: 

3545 Util.log( 

3546 ' [VDI {}: failed to load VDI info]: {}' 

3547 .format(vdi_uuid, e) 

3548 ) 

3549 info = vhdutil.VHDInfo(vdi_uuid) 

3550 info.error = 1 

3551 

3552 all_vdi_info[vdi_uuid] = info 

3553 

3554 return all_vdi_info 

3555 

3556 @override 

3557 def _prepareCoalesceLeaf(self, vdi) -> None: 

3558 vdi._activateChain() 

3559 vdi.deflate() 

3560 vdi._inflateParentForCoalesce() 

3561 

3562 @override 

3563 def _finishCoalesceLeaf(self, parent) -> None: 

3564 if not parent.isSnapshot() or parent.isAttachedRW(): 

3565 parent.inflateFully() 

3566 else: 

3567 parent.deflate() 

3568 

3569 @override 

3570 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3571 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3572 

3573 def _hasValidDevicePath(self, uuid): 

3574 try: 

3575 self._linstor.get_device_path(uuid) 

3576 except Exception: 

3577 # TODO: Maybe log exception. 

3578 return False 

3579 return True 

3580 

3581 @override 

3582 def _liveLeafCoalesce(self, vdi) -> bool: 

3583 self.lock() 

3584 try: 

3585 self._linstor.ensure_volume_is_not_locked( 

3586 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3587 ) 

3588 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3589 finally: 

3590 self.unlock() 

3591 

3592 @override 

3593 def _handleInterruptedCoalesceLeaf(self) -> None: 

3594 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3595 for uuid, parentUuid in entries.items(): 

3596 if self._hasValidDevicePath(parentUuid) or \ 

3597 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3598 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3599 else: 

3600 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3601 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3602 vdi = self.getVDI(uuid) 

3603 if vdi: 

3604 vdi.ensureUnpaused() 

3605 

3606 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3607 Util.log('*** UNDO LEAF-COALESCE') 

3608 parent = self.getVDI(parentUuid) 

3609 if not parent: 

3610 parent = self.getVDI(childUuid) 

3611 if not parent: 

3612 raise util.SMException( 

3613 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3614 ) 

3615 Util.log( 

3616 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3617 ) 

3618 parent.rename(parentUuid) 

3619 

3620 child = self.getVDI(childUuid) 

3621 if not child: 

3622 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3623 if not child: 

3624 raise util.SMException( 

3625 'Neither {} nor {} found'.format( 

3626 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3627 ) 

3628 ) 

3629 Util.log('Renaming child back to {}'.format(childUuid)) 

3630 child.rename(childUuid) 

3631 Util.log('Updating the VDI record') 

3632 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3633 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3634 

3635 # TODO: Maybe deflate here. 

3636 

3637 if child.isHidden(): 

3638 child._setHidden(False) 

3639 if not parent.isHidden(): 

3640 parent._setHidden(True) 

3641 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3642 Util.log('*** leaf-coalesce undo successful') 

3643 

3644 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3645 Util.log('*** FINISH LEAF-COALESCE') 

3646 vdi = self.getVDI(childUuid) 

3647 if not vdi: 

3648 raise util.SMException('VDI {} not found'.format(childUuid)) 

3649 # TODO: Maybe inflate. 

3650 try: 

3651 self.forgetVDI(parentUuid) 

3652 except XenAPI.Failure: 

3653 pass 

3654 self._updateSlavesOnResize(vdi) 

3655 Util.log('*** finished leaf-coalesce successfully') 

3656 

3657 def _checkSlaves(self, vdi): 

3658 try: 

3659 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3660 for openers in all_openers.values(): 

3661 for opener in openers.values(): 

3662 if opener['process-name'] != 'tapdisk': 

3663 raise util.SMException( 

3664 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3665 ) 

3666 except LinstorVolumeManagerError as e: 

3667 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3668 raise 

3669 

3670 

3671################################################################################ 

3672# 

3673# Helpers 

3674# 

3675def daemonize(): 

3676 pid = os.fork() 

3677 if pid: 

3678 os.waitpid(pid, 0) 

3679 Util.log("New PID [%d]" % pid) 

3680 return False 

3681 os.chdir("/") 

3682 os.setsid() 

3683 pid = os.fork() 

3684 if pid: 

3685 Util.log("Will finish as PID [%d]" % pid) 

3686 os._exit(0) 

3687 for fd in [0, 1, 2]: 

3688 try: 

3689 os.close(fd) 

3690 except OSError: 

3691 pass 

3692 # we need to fill those special fd numbers or pread won't work 

3693 sys.stdin = open("/dev/null", 'r') 

3694 sys.stderr = open("/dev/null", 'w') 

3695 sys.stdout = open("/dev/null", 'w') 

3696 # As we're a new process we need to clear the lock objects 

3697 lock.Lock.clearAll() 

3698 return True 

3699 

3700 

3701def normalizeType(type): 

3702 if type in LVHDSR.SUBTYPES: 

3703 type = SR.TYPE_LVHD 

3704 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3705 # temporary while LVHD is symlinked as LVM 

3706 type = SR.TYPE_LVHD 

3707 if type in [ 

3708 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3709 "moosefs", "xfs", "zfs", "largeblock" 

3710 ]: 

3711 type = SR.TYPE_FILE 

3712 if type in ["linstor"]: 

3713 type = SR.TYPE_LINSTOR 

3714 if type not in SR.TYPES: 

3715 raise util.SMException("Unsupported SR type: %s" % type) 

3716 return type 

3717 

3718GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3719 

3720 

3721def _gc_init_file(sr_uuid): 

3722 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3723 

3724 

3725def _create_init_file(sr_uuid): 

3726 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3727 with open(os.path.join( 

3728 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3729 f.write('1') 

3730 

3731 

3732def _gcLoopPause(sr, dryRun=False, immediate=False): 

3733 if immediate: 

3734 return 

3735 

3736 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3737 # point will just return. Otherwise, fall back on an abortable sleep. 

3738 

3739 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3740 

3741 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3741 ↛ exitline 3741 didn't jump to the function exit

3742 lambda *args: None) 

3743 elif os.path.exists(_gc_init_file(sr.uuid)): 

3744 def abortTest(): 

3745 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3746 

3747 # If time.sleep hangs we are in deep trouble, however for 

3748 # completeness we set the timeout of the abort thread to 

3749 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3750 Util.log("GC active, about to go quiet") 

3751 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3751 ↛ exitline 3751 didn't run the lambda on line 3751

3752 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3753 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3754 Util.log("GC active, quiet period ended") 

3755 

3756 

3757def _gcLoop(sr, dryRun=False, immediate=False): 

3758 if not lockGCActive.acquireNoblock(): 3758 ↛ 3759line 3758 didn't jump to line 3759, because the condition on line 3758 was never true

3759 Util.log("Another GC instance already active, exiting") 

3760 return 

3761 

3762 # Check we're still attached after acquiring locks 

3763 if not sr.xapi.isPluggedHere(): 

3764 Util.log("SR no longer attached, exiting") 

3765 return 

3766 

3767 # Clean up Intellicache files 

3768 sr.cleanupCache() 

3769 

3770 # Track how many we do 

3771 coalesced = 0 

3772 task_status = "success" 

3773 try: 

3774 # Check if any work needs to be done 

3775 if not sr.xapi.isPluggedHere(): 3775 ↛ 3776line 3775 didn't jump to line 3776, because the condition on line 3775 was never true

3776 Util.log("SR no longer attached, exiting") 

3777 return 

3778 sr.scanLocked() 

3779 if not sr.hasWork(): 

3780 Util.log("No work, exiting") 

3781 return 

3782 sr.xapi.create_task( 

3783 "Garbage Collection", 

3784 "Garbage collection for SR %s" % sr.uuid) 

3785 _gcLoopPause(sr, dryRun, immediate=immediate) 

3786 while True: 

3787 if SIGTERM: 

3788 Util.log("Term requested") 

3789 return 

3790 

3791 if not sr.xapi.isPluggedHere(): 3791 ↛ 3792line 3791 didn't jump to line 3792, because the condition on line 3791 was never true

3792 Util.log("SR no longer attached, exiting") 

3793 break 

3794 sr.scanLocked() 

3795 if not sr.hasWork(): 

3796 Util.log("No work, exiting") 

3797 break 

3798 

3799 if not lockGCRunning.acquireNoblock(): 3799 ↛ 3800line 3799 didn't jump to line 3800, because the condition on line 3799 was never true

3800 Util.log("Unable to acquire GC running lock.") 

3801 return 

3802 try: 

3803 if not sr.gcEnabled(): 3803 ↛ 3804line 3803 didn't jump to line 3804, because the condition on line 3803 was never true

3804 break 

3805 

3806 sr.xapi.update_task_progress("done", coalesced) 

3807 

3808 sr.cleanupCoalesceJournals() 

3809 # Create the init file here in case startup is waiting on it 

3810 _create_init_file(sr.uuid) 

3811 sr.scanLocked() 

3812 sr.updateBlockInfo() 

3813 

3814 howmany = len(sr.findGarbage()) 

3815 if howmany > 0: 

3816 Util.log("Found %d orphaned vdis" % howmany) 

3817 sr.lock() 

3818 try: 

3819 sr.garbageCollect(dryRun) 

3820 finally: 

3821 sr.unlock() 

3822 sr.xapi.srUpdate() 

3823 

3824 candidate = sr.findCoalesceable() 

3825 if candidate: 

3826 util.fistpoint.activate( 

3827 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3828 sr.coalesce(candidate, dryRun) 

3829 sr.xapi.srUpdate() 

3830 coalesced += 1 

3831 continue 

3832 

3833 candidate = sr.findLeafCoalesceable() 

3834 if candidate: 3834 ↛ 3841line 3834 didn't jump to line 3841, because the condition on line 3834 was never false

3835 sr.coalesceLeaf(candidate, dryRun) 

3836 sr.xapi.srUpdate() 

3837 coalesced += 1 

3838 continue 

3839 

3840 finally: 

3841 lockGCRunning.release() 3841 ↛ 3846line 3841 didn't jump to line 3846, because the break on line 3804 wasn't executed

3842 except: 

3843 task_status = "failure" 

3844 raise 

3845 finally: 

3846 sr.xapi.set_task_status(task_status) 

3847 Util.log("GC process exiting, no work left") 

3848 _create_init_file(sr.uuid) 

3849 lockGCActive.release() 

3850 

3851 

3852def _gc(session, srUuid, dryRun=False, immediate=False): 

3853 init(srUuid) 

3854 sr = SR.getInstance(srUuid, session) 

3855 if not sr.gcEnabled(False): 3855 ↛ 3856line 3855 didn't jump to line 3856, because the condition on line 3855 was never true

3856 return 

3857 

3858 try: 

3859 _gcLoop(sr, dryRun, immediate=immediate) 

3860 finally: 

3861 sr.check_no_space_candidates() 

3862 sr.cleanup() 

3863 sr.logFilter.logState() 

3864 del sr.xapi 

3865 

3866 

3867def _abort(srUuid, soft=False): 

3868 """Aborts an GC/coalesce. 

3869 

3870 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3871 soft: If set to True and there is a pending abort signal, the function 

3872 doesn't do anything. If set to False, a new abort signal is issued. 

3873 

3874 returns: If soft is set to False, we return True holding lockGCActive. If 

3875 soft is set to False and an abort signal is pending, we return False 

3876 without holding lockGCActive. An exception is raised in case of error.""" 

3877 Util.log("=== SR %s: abort ===" % (srUuid)) 

3878 init(srUuid) 

3879 if not lockGCActive.acquireNoblock(): 

3880 gotLock = False 

3881 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3882 abortFlag = IPCFlag(srUuid) 

3883 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3884 return False 

3885 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3886 gotLock = lockGCActive.acquireNoblock() 

3887 if gotLock: 

3888 break 

3889 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3890 abortFlag.clear(FLAG_TYPE_ABORT) 

3891 if not gotLock: 

3892 raise util.CommandException(code=errno.ETIMEDOUT, 

3893 reason="SR %s: error aborting existing process" % srUuid) 

3894 return True 

3895 

3896 

3897def init(srUuid): 

3898 global lockGCRunning 

3899 if not lockGCRunning: 3899 ↛ 3900line 3899 didn't jump to line 3900, because the condition on line 3899 was never true

3900 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

3901 global lockGCActive 

3902 if not lockGCActive: 3902 ↛ 3903line 3902 didn't jump to line 3903, because the condition on line 3902 was never true

3903 lockGCActive = LockActive(srUuid) 

3904 

3905 

3906class LockActive: 

3907 """ 

3908 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

3909 if another process holds the SR lock. 

3910 """ 

3911 def __init__(self, srUuid): 

3912 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3913 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid) 

3914 

3915 def acquireNoblock(self): 

3916 self._srLock.acquire() 

3917 

3918 try: 

3919 return self._lock.acquireNoblock() 

3920 finally: 

3921 self._srLock.release() 

3922 

3923 def release(self): 

3924 self._lock.release() 

3925 

3926 

3927def usage(): 

3928 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3929 

3930Parameters: 

3931 -u --uuid UUID SR UUID 

3932 and one of: 

3933 -g --gc garbage collect, coalesce, and repeat while there is work 

3934 -G --gc_force garbage collect once, aborting any current operations 

3935 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3936 max_age hours 

3937 -a --abort abort any currently running operation (GC or coalesce) 

3938 -q --query query the current state (GC'ing, coalescing or not running) 

3939 -x --disable disable GC/coalesce (will be in effect until you exit) 

3940 -t --debug see Debug below 

3941 

3942Options: 

3943 -b --background run in background (return immediately) (valid for -g only) 

3944 -f --force continue in the presence of VHDs with errors (when doing 

3945 GC, this might cause removal of any such VHDs) (only valid 

3946 for -G) (DANGEROUS) 

3947 

3948Debug: 

3949 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3950 purposes. ** NEVER USE IT ON A LIVE VM ** 

3951 The following parameters are required: 

3952 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3953 "deflate". 

3954 -v --vdi_uuid VDI UUID 

3955 """ 

3956 #-d --dry-run don't actually perform any SR-modifying operations 

3957 print(output) 

3958 Util.log("(Invalid usage)") 

3959 sys.exit(1) 

3960 

3961 

3962############################################################################## 

3963# 

3964# API 

3965# 

3966def abort(srUuid, soft=False): 

3967 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3968 """ 

3969 if _abort(srUuid, soft): 

3970 stop_gc_service(srUuid) 

3971 Util.log("abort: releasing the process lock") 

3972 lockGCActive.release() 

3973 return True 

3974 else: 

3975 return False 

3976 

3977 

3978def run_gc(session, srUuid, dryRun, immediate=False): 

3979 try: 

3980 _gc(session, srUuid, dryRun, immediate=immediate) 

3981 return 0 

3982 except AbortException: 

3983 Util.log("Aborted") 

3984 return 2 

3985 except Exception: 

3986 Util.logException("gc") 

3987 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3988 return 1 

3989 

3990 

3991def gc(session, srUuid, inBackground, dryRun=False): 

3992 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3993 immediately if inBackground=True. 

3994 

3995 The following algorithm is used: 

3996 1. If we are already GC'ing in this SR, return 

3997 2. If we are already coalescing a VDI pair: 

3998 a. Scan the SR and determine if the VDI pair is GC'able 

3999 b. If the pair is not GC'able, return 

4000 c. If the pair is GC'able, abort coalesce 

4001 3. Scan the SR 

4002 4. If there is nothing to collect, nor to coalesce, return 

4003 5. If there is something to collect, GC all, then goto 3 

4004 6. If there is something to coalesce, coalesce one pair, then goto 3 

4005 """ 

4006 Util.log("=== SR %s: gc ===" % srUuid) 

4007 

4008 signal.signal(signal.SIGTERM, receiveSignal) 

4009 

4010 if inBackground: 

4011 if daemonize(): 4011 ↛ exitline 4011 didn't return from function 'gc', because the condition on line 4011 was never false

4012 # we are now running in the background. Catch & log any errors 

4013 # because there is no other way to propagate them back at this 

4014 # point 

4015 

4016 run_gc(None, srUuid, dryRun) 

4017 os._exit(0) 

4018 else: 

4019 os._exit(run_gc(session, srUuid, dryRun, immediate=True)) 

4020 

4021 

4022def start_gc(session, sr_uuid): 

4023 """ 

4024 This function is used to try to start a backgrounded GC session by forking 

4025 the current process. If using the systemd version, call start_gc_service() instead. 

4026 """ 

4027 # don't bother if an instance already running (this is just an 

4028 # optimization to reduce the overhead of forking a new process if we 

4029 # don't have to, but the process will check the lock anyways) 

4030 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4031 if not lockRunning.acquireNoblock(): 

4032 if should_preempt(session, sr_uuid): 

4033 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4034 try: 

4035 if not abort(sr_uuid, soft=True): 

4036 util.SMlog("The GC has already been scheduled to re-start") 

4037 except util.CommandException as e: 

4038 if e.code != errno.ETIMEDOUT: 

4039 raise 

4040 util.SMlog('failed to abort the GC') 

4041 else: 

4042 util.SMlog("A GC instance already running, not kicking") 

4043 return 

4044 else: 

4045 lockRunning.release() 

4046 

4047 util.SMlog(f"Starting GC file is {__file__}") 

4048 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4049 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4050 

4051def _gc_service_cmd(sr_uuid, action, extra_args=None): 

4052 """ 

4053 Build and run the systemctl command for the GC service using util.doexec. 

4054 """ 

4055 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4056 cmd=["/usr/bin/systemctl", "--quiet"] 

4057 if extra_args: 

4058 cmd.extend(extra_args) 

4059 cmd += [action, f"SMGC@{sr_uuid_esc}"] 

4060 return util.doexec(cmd) 

4061 

4062 

4063def start_gc_service(sr_uuid, wait=False): 

4064 """ 

4065 This starts the templated systemd service which runs GC on the given SR UUID. 

4066 If the service was already started, this is a no-op. 

4067 

4068 Because the service is a one-shot with RemainAfterExit=no, when called with 

4069 wait=True this will run the service synchronously and will not return until the 

4070 run has finished. This is used to force a run of the GC instead of just kicking it 

4071 in the background. 

4072 """ 

4073 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4074 _gc_service_cmd(sr_uuid, "start", extra_args=None if wait else ["--no-block"]) 

4075 

4076 

4077def stop_gc_service(sr_uuid): 

4078 """ 

4079 Stops the templated systemd service which runs GC on the given SR UUID. 

4080 """ 

4081 util.SMlog(f"Stopping SMGC@{sr_uuid}...") 

4082 (rc, _stdout, stderr) = _gc_service_cmd(sr_uuid, "stop") 

4083 if rc != 0: 4083 ↛ exitline 4083 didn't return from function 'stop_gc_service', because the condition on line 4083 was never false

4084 util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`") 

4085 

4086 

4087def wait_for_completion(sr_uuid): 

4088 while get_state(sr_uuid): 

4089 time.sleep(5) 

4090 

4091 

4092def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4093 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4094 the SR lock is held. 

4095 The following algorithm is used: 

4096 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4097 2. Scan the SR 

4098 3. GC 

4099 4. return 

4100 """ 

4101 Util.log("=== SR %s: gc_force ===" % srUuid) 

4102 init(srUuid) 

4103 sr = SR.getInstance(srUuid, session, lockSR, True) 

4104 if not lockGCActive.acquireNoblock(): 

4105 abort(srUuid) 

4106 else: 

4107 Util.log("Nothing was running, clear to proceed") 

4108 

4109 if force: 

4110 Util.log("FORCED: will continue even if there are VHD errors") 

4111 sr.scanLocked(force) 

4112 sr.cleanupCoalesceJournals() 

4113 

4114 try: 

4115 sr.cleanupCache() 

4116 sr.garbageCollect(dryRun) 

4117 finally: 

4118 sr.cleanup() 

4119 sr.logFilter.logState() 

4120 lockGCActive.release() 

4121 

4122 

4123def get_state(srUuid): 

4124 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4125 the state of the templated SMGC service and will return True if it is "activating" 

4126 or "running" (for completeness, as in practice it will never achieve the latter state) 

4127 """ 

4128 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4129 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4130 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4131 state = result.stdout.decode('utf-8').rstrip() 

4132 if state == "activating" or state == "running": 

4133 return True 

4134 return False 

4135 

4136 

4137def should_preempt(session, srUuid): 

4138 sr = SR.getInstance(srUuid, session) 

4139 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4140 if len(entries) == 0: 

4141 return False 

4142 elif len(entries) > 1: 

4143 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4144 sr.scanLocked() 

4145 coalescedUuid = entries.popitem()[0] 

4146 garbage = sr.findGarbage() 

4147 for vdi in garbage: 

4148 if vdi.uuid == coalescedUuid: 

4149 return True 

4150 return False 

4151 

4152 

4153def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4154 coalesceable = [] 

4155 sr = SR.getInstance(srUuid, session) 

4156 sr.scanLocked() 

4157 for uuid in vdiUuids: 

4158 vdi = sr.getVDI(uuid) 

4159 if not vdi: 

4160 raise util.SMException("VDI %s not found" % uuid) 

4161 if vdi.isLeafCoalesceable(): 

4162 coalesceable.append(uuid) 

4163 return coalesceable 

4164 

4165 

4166def cache_cleanup(session, srUuid, maxAge): 

4167 sr = SR.getInstance(srUuid, session) 

4168 return sr.cleanupCache(maxAge) 

4169 

4170 

4171def debug(sr_uuid, cmd, vdi_uuid): 

4172 Util.log("Debug command: %s" % cmd) 

4173 sr = SR.getInstance(sr_uuid, None) 

4174 if not isinstance(sr, LVHDSR): 

4175 print("Error: not an LVHD SR") 

4176 return 

4177 sr.scanLocked() 

4178 vdi = sr.getVDI(vdi_uuid) 

4179 if not vdi: 

4180 print("Error: VDI %s not found") 

4181 return 

4182 print("Running %s on SR %s" % (cmd, sr)) 

4183 print("VDI before: %s" % vdi) 

4184 if cmd == "activate": 

4185 vdi._activate() 

4186 print("VDI file: %s" % vdi.path) 

4187 if cmd == "deactivate": 

4188 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

4189 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4190 if cmd == "inflate": 

4191 vdi.inflateFully() 

4192 sr.cleanup() 

4193 if cmd == "deflate": 

4194 vdi.deflate() 

4195 sr.cleanup() 

4196 sr.scanLocked() 

4197 print("VDI after: %s" % vdi) 

4198 

4199 

4200def abort_optional_reenable(uuid): 

4201 print("Disabling GC/coalesce for %s" % uuid) 

4202 ret = _abort(uuid) 

4203 input("Press enter to re-enable...") 

4204 print("GC/coalesce re-enabled") 

4205 lockGCRunning.release() 

4206 if ret: 

4207 lockGCActive.release() 

4208 

4209 

4210############################################################################## 

4211# 

4212# CLI 

4213# 

4214def main(): 

4215 action = "" 

4216 maxAge = 0 

4217 uuid = "" 

4218 background = False 

4219 force = False 

4220 dryRun = False 

4221 debug_cmd = "" 

4222 vdi_uuid = "" 

4223 shortArgs = "gGc:aqxu:bfdt:v:" 

4224 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4225 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4226 

4227 try: 

4228 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4229 except getopt.GetoptError: 

4230 usage() 

4231 for o, a in opts: 

4232 if o in ("-g", "--gc"): 

4233 action = "gc" 

4234 if o in ("-G", "--gc_force"): 

4235 action = "gc_force" 

4236 if o in ("-c", "--clean_cache"): 

4237 action = "clean_cache" 

4238 maxAge = int(a) 

4239 if o in ("-a", "--abort"): 

4240 action = "abort" 

4241 if o in ("-q", "--query"): 

4242 action = "query" 

4243 if o in ("-x", "--disable"): 

4244 action = "disable" 

4245 if o in ("-u", "--uuid"): 

4246 uuid = a 

4247 if o in ("-b", "--background"): 

4248 background = True 

4249 if o in ("-f", "--force"): 

4250 force = True 

4251 if o in ("-d", "--dry-run"): 

4252 Util.log("Dry run mode") 

4253 dryRun = True 

4254 if o in ("-t", "--debug"): 

4255 action = "debug" 

4256 debug_cmd = a 

4257 if o in ("-v", "--vdi_uuid"): 

4258 vdi_uuid = a 

4259 

4260 if not action or not uuid: 

4261 usage() 

4262 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4263 action != "debug" and (debug_cmd or vdi_uuid): 

4264 usage() 

4265 

4266 if action != "query" and action != "debug": 

4267 print("All output goes to log") 

4268 

4269 if action == "gc": 

4270 gc(None, uuid, background, dryRun) 

4271 elif action == "gc_force": 

4272 gc_force(None, uuid, force, dryRun, True) 

4273 elif action == "clean_cache": 

4274 cache_cleanup(None, uuid, maxAge) 

4275 elif action == "abort": 

4276 abort(uuid) 

4277 elif action == "query": 

4278 print("Currently running: %s" % get_state(uuid)) 

4279 elif action == "disable": 

4280 abort_optional_reenable(uuid) 

4281 elif action == "debug": 

4282 debug(uuid, debug_cmd, vdi_uuid) 

4283 

4284 

4285if __name__ == '__main__': 4285 ↛ 4286line 4285 didn't jump to line 4286, because the condition on line 4285 was never true

4286 main()