【发布时间】:2020-06-30 00:54:16
【问题描述】:
我编写了一个非常简单的 Java 应用程序来查询 Hadoop 表(由其他人管理),但连接不稳定。我尝试使用BasicDataSource 来管理连接,同样的连接问题仍然存在。这是 Hadoop 问题,还是我可以做些什么?
这是我的代码:
package com.example.demo;
import org.apache.commons.dbcp.BasicDataSource;
import org.springframework.stereotype.Service;
@Service
public class ImpalaConnection {
private String impalaDriverName = "com.cloudera.impala.jdbc4.Driver";
private BasicDataSource bds = new BasicDataSource();
public ImpalaConnection() {
//Set database driver name
bds.setDriverClassName(impalaDriverName);
//Set database url
String serverName = ""; // my impala url
String serverPort = "21050";
String connectionUrl = "jdbc:impala://" + serverName + ":" + serverPort +";SocketTimeout=0;" ;
bds.setUrl(connectionUrl);
// //Set database user
// bds.setUsername(DB_USER);
// //Set database password
// bds.setPassword(DB_PASSWORD);
//Set the connection pool size
// bds.setInitialSize(1);
bds.setMinIdle(5);
bds.setMaxIdle(10);
bds.setValidationQueryTimeout(100); // in second
}
private static class DataSourceHolder {
private static final ImpalaConnection INSTANCE = new ImpalaConnection();
}
public static ImpalaConnection getInstance() {
return DataSourceHolder.INSTANCE;
}
public BasicDataSource getBds() {
return bds;
}
public void setBds(BasicDataSource bds) {
this.bds = bds;
}
}
package com.example.demo;
import org.apache.commons.dbcp.BasicDataSource;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import java.sql.*;
@SpringBootApplication
public class DemoApplication {
@Autowired
ImpalaConnection im;
public static void main(String[] args) {
SpringApplication.run(DemoApplication.class, args);
for(int i = 1; i <= 4; i++) {
runQuery(i);
}
}
public static void runQuery(int k) {
System.out.println(k);
try {
BasicDataSource bds = ImpalaConnection.getInstance().getBds();
Connection con = bds.getConnection();
Statement statement = con.createStatement();
ResultSet resultSet = statement.executeQuery("SELECT\n" +
" msgs.messagepublishevent.convdetails.convid\n" +
"FROM \n" +
"dv_messagepublishevent_prq_local msgs,\n" +
"msgs.participants as participants\n" +
"WHERE msgs.year = 2020 \n" +
"LIMIT 10");
ResultSetMetaData rsmd = resultSet.getMetaData();
int columnsNumber = rsmd.getColumnCount();
while (resultSet.next()) {
for (int i = 1; i <= columnsNumber; i++) {
if (i > 1) System.out.print(", ");
String columnValue = resultSet.getString(i);
System.out.print(columnValue + " " + rsmd.getColumnName(i));
}
System.out.println("");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
虽然我能够不时连接到 Hadoop,但有时会产生错误堆栈:
java.sql.SQLException: [Simba][ImpalaJDBCDriver](500151) Error setting/closing session: {0}.
at com.cloudera.hivecommon.api.HS2Client.openSession(Unknown Source)
at com.cloudera.hivecommon.api.HS2Client.<init>(Unknown Source)
at com.cloudera.hivecommon.api.HiveServer2ClientFactory.createClient(Unknown Source)
at com.cloudera.hivecommon.core.HiveJDBCCommonConnection.connect(Unknown Source)
at com.cloudera.impala.core.ImpalaJDBCConnection.connect(Unknown Source)
at com.cloudera.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source)
at com.cloudera.jdbc.common.AbstractDriver.connect(Unknown Source)
at org.apache.commons.dbcp.DriverConnectionFactory.createConnection(DriverConnectionFactory.java:38)
at org.apache.commons.dbcp.PoolableConnectionFactory.makeObject(PoolableConnectionFactory.java:582)
at org.apache.commons.pool.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:1188)
at org.apache.commons.dbcp.PoolingDataSource.getConnection(PoolingDataSource.java:106)
at org.apache.commons.dbcp.BasicDataSource.getConnection(BasicDataSource.java:1044)
at com.example.demo.DemoApplication.runQuery(DemoApplication.java:28)
Caused by: com.cloudera.support.exceptions.GeneralException: [Simba][ImpalaJDBCDriver](500151) Error setting/closing session: {0}.
... 13 more
Caused by: org.apache.thrift.transport.TTransportException: java.net.SocketException: Connection reset
at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:129)
at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86)
at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429)
at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318)
at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219)
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:69)
at org.apache.hive.service.cli.thrift.TCLIService$Client.recv_OpenSession(TCLIService.java:159)
at com.cloudera.hivecommon.api.HS2ClientWrapper.recv_OpenSession(Unknown Source)
at org.apache.hive.service.cli.thrift.TCLIService$Client.OpenSession(TCLIService.java:146)
at com.cloudera.hivecommon.api.HS2ClientWrapper.OpenSession(Unknown Source)
at com.cloudera.hivecommon.api.HS2Client.openSession(Unknown Source)
at com.cloudera.hivecommon.api.HS2Client.<init>(Unknown Source)
at com.cloudera.hivecommon.api.HiveServer2ClientFactory.createClient(Unknown Source)
at com.cloudera.hivecommon.core.HiveJDBCCommonConnection.connect(Unknown Source)
at com.cloudera.impala.core.ImpalaJDBCConnection.connect(Unknown Source)
at com.cloudera.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source)
at com.cloudera.jdbc.common.AbstractDriver.connect(Unknown Source)
at org.apache.commons.dbcp.DriverConnectionFactory.createConnection(DriverConnectionFactory.java:38)
at org.apache.commons.dbcp.PoolableConnectionFactory.makeObject(PoolableConnectionFactory.java:582)
at org.apache.commons.pool.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:1188)
at org.apache.commons.dbcp.PoolingDataSource.getConnection(PoolingDataSource.java:106)
at org.apache.commons.dbcp.BasicDataSource.getConnection(BasicDataSource.java:1044)
at com.example.demo.DemoApplication.runQuery(DemoApplication.java:28)
at com.example.demo.DemoApplication.main(DemoApplication.java:19)
Caused by: java.net.SocketException: Connection reset
at java.net.SocketInputStream.read(SocketInputStream.java:210)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127)
... 23 more
这是我的依赖项
<dependency>
<groupId>com.cloudera.impala.jdbc</groupId>
<artifactId>ImpalaJDBC4</artifactId>
<version>2.5.36</version>
</dependency>
<dependency>
<groupId>com.cloudera.impala.jdbc</groupId>
<artifactId>TCLIServiceClient</artifactId>
<version>2.5.36</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.2.1</version>
</dependency>
【问题讨论】:
-
String serverName = ""; // my impala url-- 你是通过负载均衡器连接到 Impala,还是直接连接到 impalad 之一?如果是前者,LB 可能会定期击中不健康/下降的 impalad。
标签: java spring cloudera impala