【发布时间】:2021-01-24 19:19:55
【问题描述】:
我正在创建一个像这样的 MQ 侦听器,它工作正常,但几分钟或几小时后会因此异常而断开连接。我不控制服务器。这是通过专用电路,而不是互联网。我怀疑是网络问题。其他服务在同一电路上运行没有问题。
MQQueueConnectionFactory cf = new MQQueueConnectionFactory();
QueueConnection qc;
Queue queue;
QueueSession queueSession;
QueueReceiver qr;
public void init() throws JMSException, IOException {
cf.setQueueManager(" ");
cf.setCCDTURL(Path.of(getCcdt()).toUri().toURL());
cf.setSSLPeerName(getSslPeerName());
qc = cf.createQueueConnection();
qc.setExceptionListener(new ExceptionListener() {
@Override
public void onException(JMSException exception) {
log.error(exception);
}
});
queue = new MQQueue(cp.getMqQueue());
queueSession = qc.createQueueSession(false, Session.AUTO_ACKNOWLEDGE);
qr = queueSession.createReceiver(queue);
qr.setMessageListener(this);
qc.start();
}
例外:
com.ibm.msg.client.jms.DetailedJMSException: JMSWMQ1107: A problem with this connection has occurred.
at com.ibm.msg.client.wmq.common.internal.Reason.reasonToException(Reason.java:595) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.wmq.common.internal.Reason.createException(Reason.java:215) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.wmq.internal.WMQMessageConsumer.checkJmqiCallSuccess(WMQMessageConsumer.java:217) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.wmq.internal.WMQMessageConsumer.checkJmqiCallSuccess(WMQMessageConsumer.java:273) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.wmq.internal.WMQAsyncConsumerShadow.consumer(WMQAsyncConsumerShadow.java:686) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.mq.jmqi.remote.impl.RemoteProxyQueue.callConsumer(RemoteProxyQueue.java:3755) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.mq.jmqi.remote.impl.RemoteProxyQueue.processEvent(RemoteProxyQueue.java:4824) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.mq.jmqi.remote.impl.RemoteProxyQueue.driveConsumer(RemoteProxyQueue.java:4645) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.mq.jmqi.remote.impl.RemoteProxyQueue.deliverMsgs(RemoteProxyQueue.java:5240) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.mq.jmqi.remote.impl.RemoteDispatchThread.deliverMsgsReconnectable(RemoteDispatchThread.java:557) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.mq.jmqi.remote.impl.RemoteDispatchThread.deliverMsgs(RemoteDispatchThread.java:531) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.mq.jmqi.remote.impl.RemoteDispatchThread.run(RemoteDispatchThread.java:305) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.commonservices.workqueue.WorkQueueItem.runTask(WorkQueueItem.java:319) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.commonservices.workqueue.SimpleWorkQueueItem.runItem(SimpleWorkQueueItem.java:99) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.commonservices.workqueue.WorkQueueItem.run(WorkQueueItem.java:343) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.commonservices.workqueue.WorkQueueManager.runWorkQueueItem(WorkQueueManager.java:312) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
at com.ibm.msg.client.commonservices.j2se.workqueue.WorkQueueManagerImplementation$ThreadPoolWorker.run(WorkQueueManagerImplementation.java:1227) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
Caused by: com.ibm.mq.MQException: JMSCMQ0001: IBM MQ call failed with compcode '2' ('MQCC_FAILED') reason '2009' ('MQRC_CONNECTION_BROKEN').
at com.ibm.msg.client.wmq.common.internal.Reason.createException(Reason.java:203) ~[com.ibm.mq.allclient-9.2.0.0.jar:9.2.0.0 - p920-L200710.DE]
grep -C 0 -i -a etHeartbeatInterval mq.log
[2:30:37.720.09] 00000065 @16c85cc4 c.i.mq.exits.MQCD ----+----+----+----+---- d setHeartbeatInterval(int) setter [10(0xa)]
--
[2:30:37.721.0X] 00000065 @71ff40ae c.i.mq.exits.MQCD ----+----+----+----+--- d setHeartbeatInterval(int) setter [10(0xa)]
--
[2:30:37.795.1J] 00000065 @71ff40ae c.i.mq.exits.MQCD ----+----+----+----+---- d getHeartbeatInterval() getter [10(0xa)]
[2:30:37.795.1K] 00000065 @7b481be3 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- { setHeartbeatInterval(int,boolean) [10(0xa)] [false]
[2:30:37.795.1L] 00000065 @7b481be3 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- } setHeartbeatInterval(int,boolean)
--
[2:30:37.807.0N] 00000065 @43212202 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- { getHeartbeatInterval(boolean) [true]
[2:30:37.807.0O] 00000065 @43212202 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- } getHeartbeatInterval(boolean) returns [300(0x12c)] Integer
[2:30:37.807.0P] 00000065 @71ff40ae c.i.mq.exits.MQCD ----+----+----+----+---- d getHeartbeatInterval() getter [10(0xa)]
[2:30:37.807.0Q] 00000065 @71ff40ae c.i.mq.exits.MQCD ----+----+----+----+---- d setHeartbeatInterval(int) setter [300(0x12c)]
--
[2:30:37.811.01] 00000065 @71ff40ae c.i.mq.exits.MQCD ----+----+----+----+---- d getHeartbeatInterval() getter [300(0x12c)]
--
[2:30:37.811.03] 00000065 @a5146c3 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- { setHeartbeatInterval(int,boolean) [300(0x12c)] [true]
--
[2:30:37.811.05] 00000065 @a5146c3 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- } setHeartbeatInterval(int,boolean)
--
[2:30:37.817.10] 00000067 @71ff40ae c.i.mq.exits.MQCD ----+- d getHeartbeatInterval() getter [300(0x12c)]
--
[2:30:37.826.0F] 00000065 @71ff40ae c.i.mq.exits.MQCD ----+----+----+----+---- d getHeartbeatInterval() getter [300(0x12c)]
[2:30:37.826.0G] 00000065 @38eff414 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- { setHeartbeatInterval(int,boolean) [300(0x12c)] [true]
[2:30:37.826.0H] 00000065 @38eff414 c.i.m.j.remote.rfp.RfpID ----+----+----+----+---- } setHeartbeatInterval(int,boolean)
--
[2:30:37.838.0X] 00000065 @71ff40ae c.i.mq.exits.MQCD ----+----+----+----+---- d getHeartbeatInterval() getter [300(0x12c)]
[2:30:37.838.0Y] 00000065 @1bb7bbb4 c.i.m.j.remote.impl.RemoteTCPConnection ----+----+----+----+---- { protocolSetHeartbeatInterval(int) [300(0x12c)]
[2:30:37.838.0Z] 00000065 @1bb7bbb4 c.i.m.j.remote.impl.RemoteTCPConnection ----+----+----+----+---- } protocolSetHeartbeatInterval(int)
--
[2:30:37.948.1A] 00000065 @22b76f8e c.i.mq.exits.MQCD ----+----+----+---- d setHeartbeatInterval(int) setter [10(0xa)]
--
[2:30:37.957.0C] 00000065 @716fc5e c.i.mq.exits.MQCD ----+----+----+----+--- d setHeartbeatInterval(int) setter [10(0xa)]
--
[2:30:37.958.1T] 00000065 @4c4e13bb c.i.mq.exits.MQCD ----+----+----+----+-- d setHeartbeatInterval(int) setter [10(0xa)]
--
[2:30:38.039.0K] 00000065 @4c4e13bb c.i.mq.exits.MQCD ----+----+----+----+--- d getHeartbeatInterval() getter [10(0xa)]
[2:30:38.039.0L] 00000065 @1cf5776d c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- { setHeartbeatInterval(int,boolean) [10(0xa)] [false]
[2:30:38.039.0M] 00000065 @1cf5776d c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- } setHeartbeatInterval(int,boolean)
--
[2:30:38.052.0O] 00000065 @3ed71710 c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- { getHeartbeatInterval(boolean) [true]
[2:30:38.052.0P] 00000065 @3ed71710 c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- } getHeartbeatInterval(boolean) returns [300(0x12c)] Integer
[2:30:38.052.0Q] 00000065 @4c4e13bb c.i.mq.exits.MQCD ----+----+----+----+--- d getHeartbeatInterval() getter [10(0xa)]
[2:30:38.052.0R] 00000065 @4c4e13bb c.i.mq.exits.MQCD ----+----+----+----+--- d setHeartbeatInterval(int) setter [300(0x12c)]
--
[2:30:38.056.02] 00000065 @4c4e13bb c.i.mq.exits.MQCD ----+----+----+----+--- d getHeartbeatInterval() getter [300(0x12c)]
[2:30:38.056.03] 00000065 @218f5897 c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- { setHeartbeatInterval(int,boolean) [300(0x12c)] [true]
[2:30:38.056.04] 00000065 @218f5897 c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- } setHeartbeatInterval(int,boolean)
--
[2:30:38.061.00] 00000069 @4c4e13bb c.i.mq.exits.MQCD ----+- d getHeartbeatInterval() getter [300(0x12c)]
--
[2:30:38.072.0I] 00000065 @4c4e13bb c.i.mq.exits.MQCD ----+----+----+----+--- d getHeartbeatInterval() getter [300(0x12c)]
[2:30:38.072.0J] 00000065 @5d299189 c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- { setHeartbeatInterval(int,boolean) [300(0x12c)] [true]
[2:30:38.072.0K] 00000065 @5d299189 c.i.m.j.remote.rfp.RfpID ----+----+----+----+--- } setHeartbeatInterval(int,boolean)
--
[2:30:38.085.02] 00000065 @4c4e13bb c.i.mq.exits.MQCD ----+----+----+----+--- d getHeartbeatInterval() getter [300(0x12c)]
[2:30:38.085.03] 00000065 @51b68e6a c.i.m.j.remote.impl.RemoteTCPConnection ----+----+----+----+--- { protocolSetHeartbeatInterval(int) [300(0x12c)]
--
[2:30:38.085.05] 00000065 @51b68e6a c.i.m.j.remote.impl.RemoteTCPConnection ----+----+----+----+--- } protocolSetHeartbeatInterval(int)
在 90 分钟后或大约 50 分钟后出现断开连接:
15:28 start
16:59 restart (90 min)
17:55 restart (55 min)
18:51 restart (54 min)
19:47 restart (56 min)
3:06 start
4:47 restart (90 min)
5:38 restart (50 min)
6:34 restart (56 min
7:25 restart (50 min)
8:21 restart (55 min)
9:57 restart (95 min)
10:48 restart (50 min)
11:39 restart (51 min)
12:30 restart (51 min)
13:26 restart (56 min)
14:22 restart (56 min)
15:13 restart (51 min)
数据包捕获看起来像这样。参见#162,我发送了 TLS 数据包,然后 9 次重新传输,然后在 #162 发送后 60 秒断开连接。
192.168.77.102 = me (MQ JMS client)
192.168.77.1 = router(next hop)
160.43.166.189 = MQ server
【问题讨论】:
-
MQRC_CONNECTION_BROKEN (2009)表示网络连接有问题(也就是“中断”)或者客户端没有及时收到来自队列管理器的心跳 (HB) 响应。您提到它是over a dedicated circuit,这表明它可能会通过其他特殊的网络设备,例如防火墙或负载平衡器,其中任何一个都可能添加自己的会话级别超时。 -
对于 MQ,HB 由客户端在没有其他通道活动的 HBINT 秒后发送到服务器,如果 QMGR 在 HBINT+5 中没有收到来自客户端的 HB,它将发送一个 HB 到客户端。如果 HBINT 小于 60 秒,则发送 HB 的一方将等待 HBINT 秒等待响应,然后抛出 2009,如果 HBINT 大于或等于 60 秒,则发送 HB 的一方将等待 60在抛出 2009 之前的响应秒数。HBINT 在通道开始时协商为队列管理器和客户端的值中的较高值。
-
对于没有 CCDT 的 Java MQ 客户端,它们将始终向队列管理器提供 HBINT(1),这实际上意味着 HBINT 将始终与 Qmgr HBINT 值协商。默认情况下,MQ 通道具有 HBINT(300),这意味着 HB 仅在没有其他活动时每 5 分钟发送一次,并且它们将等待 60 秒以等待响应。如果您的负载均衡器/防火墙的超时时间少于 5 分钟,那么当您的通道未主动发送数据时,它可能会在发送 HB 之前处于空闲状态并断开连接。
-
@JoshMc,感谢您提供的有用信息!我运行数据包捕获并发现:我的客户端向 MQ 服务器发送了一个 TLS 数据包。 200 毫秒后,我没有收到任何回复。我正在发送 TLS 数据的 TCP 重传。在 60 秒内重新传输 9 次后,我断开/重新连接,一切都恢复正常。所以我猜这是服务器上的网络问题,毕竟不是 MQ。或者如果它是 MQ,它是服务器上的 MQ,而不是我的客户端。
-
SVRCONN 通道上的 HBINT 是什么?