Skip to content

Commit 038d06c

Browse files
author
gitlab
committed
Merge branch 'fix-35297-master@@2' into 'master'
[BugFix: ZSTACK-35297] fix checking host liveness See merge request zstackio/zstack-utility!337
2 parents 0f508cb + 919b90d commit 038d06c

File tree

1 file changed

+36
-21
lines changed

1 file changed

+36
-21
lines changed

kvmagent/kvmagent/plugins/ha_plugin.py

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,13 @@ def __init__(self, record):
5959
self.timestamp = int(ts)
6060

6161
for line in lines[1:]:
62-
k, v = line.strip().split('=', 2)
63-
if k == 'io_timeout': self.io_timeout = int(v)
64-
elif k == 'last_check': self.last_check = int(v)
65-
elif k == 'last_live': self.last_live = int(v)
62+
try:
63+
k, v = line.strip().split('=', 2)
64+
if k == 'io_timeout': self.io_timeout = int(v)
65+
elif k == 'last_check': self.last_check = int(v)
66+
elif k == 'last_live': self.last_live = int(v)
67+
except ValueError:
68+
logger.warn("unexpected sanlock status: %s" % line)
6669

6770
if not all([self.io_timeout, self.last_check, self.last_live]):
6871
raise Exception('unexpected sanlock host status: ' + record)
@@ -683,34 +686,46 @@ def scan_host(self, req):
683686

684687
@kvmagent.replyerror
685688
def sanlock_scan_host(self, req):
689+
def parseLockspaceHostIdPair(s):
690+
xs = s.split(':', 3)
691+
return xs[0].split()[-1], int(xs[1])
692+
693+
def check_host_status(myHostId, lkspc, hostIds):
694+
hstatus = shell.call("timeout 5 sanlock client host_status -s %s -D" % lkspc)
695+
parser = SanlockHostStatusParser(hstatus)
696+
697+
result = {}
698+
if not parser.is_alive(myHostId):
699+
logger.info("[SANLOCK] current node has no LIVE records for lockspace: %s" % lkspc)
700+
return result
701+
702+
for target in cmd.hostIds:
703+
hostId, psUuid = target.hostId, target.psUuid
704+
if psUuid not in lkspc: continue
705+
706+
timed_out = parser.is_timed_out(hostId)
707+
if timed_out is not None:
708+
result[psUuid + '_' + str(hostId)] = not timed_out
709+
return result
710+
686711
rsp = SanlockScanRsp()
687712
cmd = jsonobject.loads(req[http.REQUEST_BODY])
688713
cstatus = shell.call("timeout 5 sanlock client gets -h 1")
689714
logger.info("[SANLOCK] reports client status:\n" + cstatus)
690-
myIds = [ int(line.split(':', 3)[1]) for line in filter(lambda x: x.startswith('s'), cstatus.splitlines()) ]
715+
pairs = [ parseLockspaceHostIdPair(line) for line in filter(lambda x: x.startswith('s'), cstatus.splitlines()) ]
691716

692-
if len(myIds) == 0:
717+
if len(pairs) == 0:
693718
logger.info("[SANLOCK] host id not found")
694719
return jsonobject.dumps(rsp)
695720

696-
hstatus = shell.call("timeout 5 sanlock client host_status -D")
697-
parser = SanlockHostStatusParser(hstatus)
698-
699-
is_alive = False
700-
for hostId in myIds:
701-
is_alive = parser.is_alive(hostId)
702-
if is_alive: break
721+
result = {}
722+
for lkspc, hid in pairs:
723+
res = check_host_status(hid, lkspc, cmd.hostIds)
724+
result.update(res)
703725

704-
if not is_alive:
705-
logger.info("[SANLOCK] current node has no LIVE records")
726+
if len(result) == 0:
706727
return jsonobject.dumps(rsp)
707728

708-
result = {}
709-
for hostId in cmd.hostIds:
710-
timed_out = parser.is_timed_out(hostId)
711-
if timed_out is not None:
712-
result[str(hostId)] = not timed_out
713-
714729
rsp.result = result
715730
return jsonobject.dumps(rsp)
716731

0 commit comments

Comments
 (0)