fix: improve connection stability and add connection check API
- Fix race condition in resource cleanup during disconnect/cancel - Trigger reconnect on ping send failure - Trigger reconnect on write operation failure - Add isConnected and connectionState properties - Add checkConnection() method for active connection testing - Add ensureConnected() method for proactive connection recovery
This commit is contained in:
@@ -407,12 +407,25 @@ class ConnectionHandler: ChannelInboundHandler {
|
||||
}
|
||||
} onCancel: {
|
||||
logger.debug("Connection task cancelled")
|
||||
// Clean up resources
|
||||
if let channel = self.channel {
|
||||
channel.close(mode: .all, promise: nil)
|
||||
self.channel = nil
|
||||
}
|
||||
// Clean up resources safely to avoid race conditions
|
||||
// Capture references first to avoid concurrent access issues
|
||||
let channelToClose = self.channel
|
||||
let bufferToRelease = self.batchBuffer
|
||||
|
||||
// Clear references first to prevent other threads from using them
|
||||
self.channel = nil
|
||||
self.batchBuffer = nil
|
||||
|
||||
// Close channel asynchronously after clearing references
|
||||
// This ensures BatchBuffer's deinit won't conflict with channel close
|
||||
if let channel = channelToClose {
|
||||
channel.eventLoop.execute {
|
||||
channel.close(mode: .all, promise: nil)
|
||||
}
|
||||
}
|
||||
|
||||
// bufferToRelease will be released here after channel close is scheduled
|
||||
_ = bufferToRelease
|
||||
|
||||
let continuationToResume: CheckedContinuation<ServerInfo, Error>? = self
|
||||
.serverInfoContinuation.withLockedValue { cont in
|
||||
@@ -551,12 +564,25 @@ class ConnectionHandler: ChannelInboundHandler {
|
||||
}
|
||||
} onCancel: {
|
||||
logger.debug("Client connect initialization cancelled")
|
||||
// Clean up resources
|
||||
if let channel = self.channel {
|
||||
channel.close(mode: .all, promise: nil)
|
||||
self.channel = nil
|
||||
}
|
||||
// Clean up resources safely to avoid race conditions
|
||||
// Capture references first to avoid concurrent access issues
|
||||
let channelToClose = self.channel
|
||||
let bufferToRelease = self.batchBuffer
|
||||
|
||||
// Clear references first to prevent other threads from using them
|
||||
self.channel = nil
|
||||
self.batchBuffer = nil
|
||||
|
||||
// Close channel asynchronously after clearing references
|
||||
// This ensures BatchBuffer's deinit won't conflict with channel close
|
||||
if let channel = channelToClose {
|
||||
channel.eventLoop.execute {
|
||||
channel.close(mode: .all, promise: nil)
|
||||
}
|
||||
}
|
||||
|
||||
// bufferToRelease will be released here after channel close is scheduled
|
||||
_ = bufferToRelease
|
||||
|
||||
let continuationToResume: CheckedContinuation<Void, Error>? = self
|
||||
.connectionEstablishedContinuation.withLockedValue { cont in
|
||||
@@ -738,6 +764,13 @@ class ConnectionHandler: ChannelInboundHandler {
|
||||
private func disconnect() async throws {
|
||||
self.pingTask?.cancel()
|
||||
clearPendingPings() // Clear pending pings to avoid promise leaks
|
||||
|
||||
// Safely clear batchBuffer before closing channel
|
||||
// This prevents race conditions during deallocation
|
||||
let bufferToRelease = self.batchBuffer
|
||||
self.batchBuffer = nil
|
||||
_ = bufferToRelease // Release after clearing reference
|
||||
|
||||
try await self.channel?.close().get()
|
||||
}
|
||||
|
||||
@@ -819,6 +852,11 @@ class ConnectionHandler: ChannelInboundHandler {
|
||||
logger.debug("sent ping: \(pingsOut)")
|
||||
} catch {
|
||||
logger.error("Unable to send ping: \(error)")
|
||||
// Trigger reconnect on ping failure - connection may be broken
|
||||
let currentState = state.withLockedValue { $0 }
|
||||
if currentState == .connected {
|
||||
handleDisconnect()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -895,6 +933,12 @@ class ConnectionHandler: ChannelInboundHandler {
|
||||
|
||||
func handleDisconnect() {
|
||||
state.withLockedValue { $0 = .disconnected }
|
||||
|
||||
// Safely clear batchBuffer first to avoid race conditions
|
||||
let bufferToRelease = self.batchBuffer
|
||||
self.batchBuffer = nil
|
||||
_ = bufferToRelease // Release after clearing reference
|
||||
|
||||
if let channel = self.channel {
|
||||
let promise = channel.eventLoop.makePromise(of: Void.self)
|
||||
Task {
|
||||
@@ -915,10 +959,13 @@ class ConnectionHandler: ChannelInboundHandler {
|
||||
} catch {
|
||||
logger.error("Error closing connection: \(error)")
|
||||
}
|
||||
// Only start reconnect after disconnect is complete
|
||||
self.handleReconnect()
|
||||
}
|
||||
} else {
|
||||
// No channel, start reconnect immediately
|
||||
handleReconnect()
|
||||
}
|
||||
|
||||
handleReconnect()
|
||||
}
|
||||
|
||||
func handleReconnect() {
|
||||
@@ -979,6 +1026,12 @@ class ConnectionHandler: ChannelInboundHandler {
|
||||
do {
|
||||
try await buffer.writeMessage(operation)
|
||||
} catch {
|
||||
// Trigger reconnect on write failure - connection may be broken
|
||||
let currentState = state.withLockedValue { $0 }
|
||||
if currentState == .connected {
|
||||
logger.error("Write operation failed, triggering reconnect: \(error)")
|
||||
handleDisconnect()
|
||||
}
|
||||
throw NatsError.ClientError.io(error)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user