Skip to content

Commit 9199888

Browse files
committed
[core] Improve NetworkHandler exception handling and optimize exception logging:
- close #2175 - close #2148 - Fix `Throwable.findCause` - Rename `mirai.network.handle.selector.logging` to `mirai.network.handler.selector.logging` - Hide rather long and complicated `CancellationException`s, use `mirai.network.handler.cancellation.trace` to re-enable it
1 parent 88efa14 commit 9199888

28 files changed

+523
-253
lines changed

docs/DebuggingNetwork.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
|:----------------------------------------------------|:---------------------------------|:---------------------------------------------------------------------------------------|
1515
| `mirai.network.handler.selector.max.attempts` | `[1, 2147483647]` | 最大重连尝试次数 |
1616
| `mirai.network.reconnect.delay` | `[0, 9223372036854775807]` | 两次重连尝试的间隔毫秒数 |
17-
| `mirai.network.handle.selector.logging` | `true`/`false` | 启用执行重连时的详细日志 |
17+
| `mirai.network.handler.selector.logging` | `true`/`false` | 启用执行重连时的详细日志 |
18+
| `mirai.network.handler.cancellation.trace` | `true`/`false` | 让网络层的异常时包含详细原因 |
1819
| `mirai.network.state.observer.logging` | `true`/`on`/`false`/`off`/`full` | 启用网络层状态变更的日志 |
1920
| `mirai.event.launch.undispatched` | `true`/`false` | 详见 [源码内注释][launch-undispatched] |
2021
| `mirai.resource.creation.stack.enabled` | `true`/`false` | 启用 `ExternalResource` 创建时的 stacktrace 记录 (影响性能), 在资源泄露时展示 |
@@ -31,5 +32,5 @@
3132

3233
修改示例:
3334

34-
在启动 JVM 时添加参数 `-Dmirai.network.handle.selector.logging=true`
35+
在启动 JVM 时添加参数 `-Dmirai.network.handler.selector.logging=true`
3536
则启用执行重连时的详细日志

mirai-core-api/src/commonMain/kotlin/network/LoginFailedException.kt

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ public sealed class LoginFailedException(
2727
message: String? = null,
2828
cause: Throwable? = null
2929
) : RuntimeException(message, cause)
30+
// 实现提示 (仅供网络层实现者参考): `LoginFailedException` 会被包装为 `NetworkException` (`LoginFailedExceptionAsNetworkException`),
31+
// 并在 `bot.login` 时 unwrap.
3032

3133
/**
3234
* 密码输入错误 (有时候也会是其他错误, 如 `"当前上网环境异常,请更换网络环境或在常用设备上登录或稍后再试。"`)

mirai-core-utils/src/androidMain/kotlin/Actuals.kt

+12-7
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,18 @@ internal class StacktraceException(override val message: String?, private val st
3030
override fun getStackTrace(): Array<StackTraceElement> = stacktrace
3131
}
3232

33-
public actual inline fun <reified E> Throwable.unwrap(): Throwable {
33+
public actual inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean): Throwable {
3434
if (this !is E) return this
35-
val e = StacktraceException("Unwrapped exception: $this", this.stackTrace)
36-
for (throwable in this.suppressed) {
37-
e.addSuppressed(throwable)
35+
return if (addSuppressed) {
36+
val e = StacktraceException("Unwrapped exception: $this", this.stackTrace)
37+
for (throwable in this.suppressed) {
38+
e.addSuppressed(throwable)
39+
}
40+
this.findCause { it !is E }
41+
?.also { it.addSuppressed(e) }
42+
?: this
43+
} else {
44+
this.findCause { it !is E }
45+
?: this
3846
}
39-
return this.findCause { it !is E }
40-
?.also { it.addSuppressed(e) }
41-
?: this
4247
}

mirai-core-utils/src/commonMain/kotlin/CoroutineUtils.kt

+3-15
Original file line numberDiff line numberDiff line change
@@ -118,20 +118,8 @@ public fun CoroutineContext.childScopeContext(
118118
else it
119119
}
120120

121-
public inline fun <R> runUnwrapCancellationException(block: () -> R): R {
122-
try {
123-
return block()
124-
} catch (e: CancellationException) {
125-
// e is like `Exception in thread "main" kotlinx.coroutines.JobCancellationException: Parent job is Cancelling; job=JobImpl{Cancelled}@f252f300`
126-
// and this is useless.
127-
throw e.unwrapCancellationException()
128-
129-
// if (e.suppressed.isNotEmpty()) throw e // preserve details.
130-
// throw e.findCause { it !is CancellationException } ?: e
131-
}
132-
}
133-
134-
public fun Throwable.unwrapCancellationException(): Throwable = unwrap<CancellationException>()
121+
public fun Throwable.unwrapCancellationException(addSuppressed: Boolean = true): Throwable =
122+
unwrap<CancellationException>(addSuppressed)
135123

136124
/**
137125
* For code
@@ -178,6 +166,6 @@ public fun Throwable.unwrapCancellationException(): Throwable = unwrap<Cancellat
178166
* ```
179167
*/
180168
@Suppress("unused")
181-
public expect inline fun <reified E> Throwable.unwrap(): Throwable
169+
public expect inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean = true): Throwable
182170

183171
public val CoroutineContext.coroutineName: String get() = this[CoroutineName]?.name ?: "unnamed"

mirai-core-utils/src/commonMain/kotlin/StandardUtils.kt

+8-5
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,15 @@ public fun Throwable.causes(maxDepth: Int = 20): Sequence<Throwable> = sequence
8383

8484
public inline fun Throwable.findCause(maxDepth: Int = 20, filter: (Throwable) -> Boolean): Throwable? {
8585
var depth = 0
86-
var rootCause: Throwable? = this
86+
var curr: Throwable? = this
8787
while (true) {
88-
if (rootCause?.cause === rootCause) return rootCause
89-
val current = rootCause?.cause ?: return null
90-
if (filter(current)) return current
91-
rootCause = rootCause.cause
88+
if (curr == null) return null
89+
val cause = curr.cause ?: return null
90+
if (filter(cause)) return cause
91+
92+
if (curr.cause === curr) return null // circular reference
93+
curr = curr.cause
94+
9295
if (depth++ >= maxDepth) return null
9396
}
9497
}

mirai-core-utils/src/jvmMain/kotlin/Actuals.kt

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ public actual fun String.decodeBase64(): ByteArray {
2323
return Base64.getDecoder().decode(this)
2424
}
2525

26-
public actual inline fun <reified E> Throwable.unwrap(): Throwable {
26+
public actual inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean): Throwable {
2727
if (this !is E) return this
2828
return this.findCause { it !is E }
29-
?.also { it.addSuppressed(this) }
29+
?.also { if (addSuppressed) it.addSuppressed(this) }
3030
?: this
3131
}

mirai-core-utils/src/nativeMain/kotlin/CoroutineUtils.kt

+2-2
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ public actual suspend inline fun <T, R> T.runBIO(crossinline block: T.() -> R):
6464
* ```
6565
*/
6666
@Suppress("unused")
67-
public actual inline fun <reified E> Throwable.unwrap(): Throwable {
67+
public actual inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean): Throwable {
6868
if (this !is E) return this
6969
return this.findCause { it !is E }
70-
?.also { it.addSuppressed(this) }
70+
?.also { if (addSuppressed) it.addSuppressed(this) }
7171
?: this
7272
}

mirai-core/src/commonMain/kotlin/AbstractBot.kt

+10-5
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,18 @@ internal abstract class AbstractBot constructor(
135135
try {
136136
network.resumeConnection()
137137
} catch (e: Throwable) { // failed to init
138-
val cause = e.unwrap<NetworkException>()
138+
// lift cause to the top of the exception chain. e.g. LoginFailedException
139+
val cause = if (e is NetworkException) {
140+
e.unwrapForPublicApi()
141+
} else e
142+
143+
// close bot if it hadn't been done during `resumeConnection()`
139144
if (!components[SsoProcessor].firstLoginSucceed) {
140-
this.close(cause) // failed to do first login.
141-
}
142-
if (cause is LoginFailedException && cause.killBot) {
143-
close(cause)
145+
close(cause) // failed to do first login, close bot
146+
} else if (cause is LoginFailedException && cause.killBot) {
147+
close(cause) // re-login failed and has caused bot being somehow killed by server
144148
}
149+
145150
throw cause
146151
}
147152
logger.info { "Bot login successful." }

mirai-core/src/commonMain/kotlin/network/components/BotOfflineEventMonitor.kt

+13-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import net.mamoe.mirai.internal.network.handler.selector.NetworkException
2323
import net.mamoe.mirai.utils.*
2424

2525
/**
26-
* Handles [BotOfflineEvent]
26+
* Handles [BotOfflineEvent]. It launches recovery jobs when receiving offline events from server.
2727
*/
2828
internal interface BotOfflineEventMonitor {
2929
companion object : ComponentKey<BotOfflineEventMonitor>
@@ -70,10 +70,20 @@ internal class BotOfflineEventMonitorImpl : BotOfflineEventMonitor {
7070
closeNetwork()
7171
}
7272
is BotOfflineEvent.Force -> {
73-
bot.logger.warning { "Connection occupied by another android device: ${event.message}" }
73+
bot.logger.warning { "Connection occupied by another android device. Will try to resume connection. (${event.message})" }
7474
closeNetwork()
7575
}
76-
is BotOfflineEvent.MsfOffline,
76+
is BotOfflineEvent.MsfOffline -> {
77+
// This normally means bot is blocked and requires manual action.
78+
bot.logger.warning { "Server notifies offline. (${event.cause?.message ?: event.toString()})" }
79+
closeNetwork()
80+
// `closeNetwork` will close NetworkHandler,
81+
// after which NetworkHandlerSelector will create a new instance to try to fix the problem.
82+
// A new login attempt will fail because the bot is blocked, with LoginFailedException which then wrapped into NetworkException. (LoginFailedExceptionAsNetworkException)
83+
// Selector will handle this exception, and close the block while logging this down.
84+
85+
// See SelectorNetworkHandler.instance for more information on how the Selector handles the exception.
86+
}
7787
is BotOfflineEvent.Dropped,
7888
is BotOfflineEvent.RequireReconnect,
7989
-> {

mirai-core/src/commonMain/kotlin/network/components/SsoProcessor.kt

+12
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import net.mamoe.mirai.internal.network.QQAndroidClient
1717
import net.mamoe.mirai.internal.network.WLoginSigInfo
1818
import net.mamoe.mirai.internal.network.component.ComponentKey
1919
import net.mamoe.mirai.internal.network.handler.NetworkHandler
20+
import net.mamoe.mirai.internal.network.handler.selector.NetworkException
2021
import net.mamoe.mirai.internal.network.protocol.packet.OutgoingPacketWithRespType
2122
import net.mamoe.mirai.internal.network.protocol.packet.login.StatSvc
2223
import net.mamoe.mirai.internal.network.protocol.packet.login.WtLogin.Login.LoginPacketResponse
@@ -57,6 +58,17 @@ internal interface SsoProcessor {
5758
companion object : ComponentKey<SsoProcessor>
5859
}
5960

61+
/**
62+
* Wraps [LoginFailedException] into [NetworkException]
63+
*/
64+
internal class LoginFailedExceptionAsNetworkException(
65+
private val underlying: LoginFailedException
66+
) : NetworkException(underlying.message ?: "Login failed", underlying, !underlying.killBot) {
67+
override fun unwrapForPublicApi(): Throwable {
68+
return underlying
69+
}
70+
}
71+
6072
internal enum class FirstLoginResult(
6173
val success: Boolean,
6274
val canRecoverOnFirstLogin: Boolean,

mirai-core/src/commonMain/kotlin/network/handler/CommonNetworkHandler.kt

+24-13
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ import kotlinx.coroutines.*
1414
import kotlinx.coroutines.channels.Channel
1515
import kotlinx.coroutines.channels.onFailure
1616
import net.mamoe.mirai.internal.network.components.*
17+
import net.mamoe.mirai.internal.network.handler.NetworkHandler.Companion.runUnwrapCancellationException
1718
import net.mamoe.mirai.internal.network.handler.selector.NetworkException
1819
import net.mamoe.mirai.internal.network.handler.selector.NetworkHandlerSelector
1920
import net.mamoe.mirai.internal.network.handler.state.StateObserver
2021
import net.mamoe.mirai.internal.network.impl.HeartbeatFailedException
2122
import net.mamoe.mirai.internal.network.protocol.packet.OutgoingPacket
23+
import net.mamoe.mirai.network.LoginFailedException
2224
import net.mamoe.mirai.utils.*
2325
import kotlin.coroutines.CoroutineContext
2426

@@ -171,14 +173,14 @@ internal abstract class CommonNetworkHandler<Conn>(
171173
///////////////////////////////////////////////////////////////////////////
172174

173175
override fun close(cause: Throwable?) {
174-
super.close(cause) // cancel coroutine scope
175176
if (state == NetworkHandler.State.CLOSED) return // quick check if already closed
176177
if (setState { StateClosed(cause) } == null) return // atomic check
178+
super.close(cause) // cancel coroutine scope
177179
}
178180

179181
init {
180182
coroutineContext.job.invokeOnCompletion { e ->
181-
close(e?.unwrapCancellationException())
183+
close(e)
182184
}
183185
}
184186

@@ -231,7 +233,6 @@ internal abstract class CommonNetworkHandler<Conn>(
231233
) : CommonState(NetworkHandler.State.CONNECTING) {
232234
private lateinit var connection: Deferred<Conn>
233235

234-
@Suppress("JoinDeclarationAndAssignment")
235236
private lateinit var connectResult: Deferred<Unit>
236237

237238
override fun startState() {
@@ -241,18 +242,27 @@ internal abstract class CommonNetworkHandler<Conn>(
241242

242243
connectResult = async {
243244
connection.join()
244-
context[SsoProcessor].login(this@CommonNetworkHandler)
245+
try {
246+
context[SsoProcessor].login(this@CommonNetworkHandler)
247+
} catch (e: LoginFailedException) {
248+
throw LoginFailedExceptionAsNetworkException(e)
249+
}
245250
}
246251
connectResult.invokeOnCompletion { error ->
247252
if (error == null) {
248-
this@CommonNetworkHandler.launch { resumeConnection() }
253+
this@CommonNetworkHandler.launch { resumeConnection() } // go to next state.
249254
} else {
250255
// failed in SSO stage
251256
context[SsoProcessor].firstLoginResult.compareAndSet(null, FirstLoginResult.OTHER_FAILURE)
252257

253-
if (error is StateSwitchingException && error.new is CommonNetworkHandler<*>.StateConnecting) {
254-
return@invokeOnCompletion // state already switched, so do not do it again.
258+
if (error is CancellationException) {
259+
// CancellationException is either caused by parent cancellation or manual `connectResult.cancel`.
260+
// The later should not happen, so it's definitely due to the parent cancellation.
261+
// It means that the super scope, the NetworkHandler is closed.
262+
// If we don't `return` here, state will be set to StateClosed with CancellationException, which isn't the real cause.
263+
return@invokeOnCompletion
255264
}
265+
256266
setState {
257267
// logon failure closes the network handler.
258268
StateClosed(collectiveExceptions.collectGet(error))
@@ -304,25 +314,21 @@ internal abstract class CommonNetworkHandler<Conn>(
304314
return true
305315
}
306316

307-
private val configPush = this@CommonNetworkHandler.launch(CoroutineName("ConfigPush sync")) {
308-
context[ConfigPushProcessor].syncConfigPush(this@CommonNetworkHandler)
309-
}
317+
// Yes, nothing to do in this state.
310318

311319
override suspend fun resumeConnection0(): Unit = runUnwrapCancellationException {
312320
(coroutineContext.job as CompletableJob).run {
313321
complete()
314322
join()
315323
}
316-
joinCompleted(configPush) // throw exception
317-
setState { StateOK(connection, configPush) }
324+
setState { StateOK(connection) }
318325
} // noop
319326

320327
override fun toString(): String = "StateLoading"
321328
}
322329

323330
protected inner class StateOK(
324331
private val connection: Conn,
325-
private val configPush: Job,
326332
) : CommonState(NetworkHandler.State.OK) {
327333
override fun startState() {
328334
coroutineContext.job.invokeOnCompletion { err ->
@@ -346,6 +352,11 @@ internal abstract class CommonNetworkHandler<Conn>(
346352
context[KeyRefreshProcessor].keyRefreshLoop(this@CommonNetworkHandler)
347353
}
348354

355+
private val configPush = this@CommonNetworkHandler.launch(CoroutineName("ConfigPush sync")) {
356+
context[ConfigPushProcessor].syncConfigPush(this@CommonNetworkHandler)
357+
}
358+
359+
349360
override suspend fun sendPacketImpl(packet: OutgoingPacket): Boolean {
350361
connection.writeAndFlushOrCloseAsync(packet)
351362
return true

0 commit comments

Comments
 (0)