【问题标题】:error writing with Apache Spark SQLContext使用 Apache Spark SQLContext 编写错误
【发布时间】:2017-05-03 03:54:38
【问题描述】:

我是使用 Spark SQL 的新手。我遵循了 DataBricks 的在线指南:https://docs.databricks.com/spark/latest/data-sources/sql-databases.html

我可以成功连接到 MySQL 实例并从中读取。但我不断从 Spark SQL 中得到 NoTableFound 或 NoDatabaseFound 错误的变体。这是我的整个测试类的样子:

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Properties;

public class MySqlConnectionTest {
    private static final String MYSQL_USERNAME = "";
    private static final String MYSQL_PASSWORD = "";
    private static final String MYSQL_HOSTNAME = "";
    private static final String MYSQL_PORT = "";
    private static final String MYSQL_DATABASE = "";
    private static final String MYSQL_URL = "jdbc:mysql://" + MYSQL_HOSTNAME + ":" + MYSQL_PORT + "/" + MYSQL_DATABASE + "?user=" + MYSQL_USERNAME + "&password=" + MYSQL_PASSWORD;

public static void main(String[] args) {
    Properties connectionProperties = new Properties();
    connectionProperties.put("user", MYSQL_USERNAME);
    connectionProperties.put("password", MYSQL_PASSWORD);

    /* First verify we are getting a valid connection!
    try {
        testConnection();
    } catch(Exception e) {
        e.printStackTrace();
    } */

    /*
    * NONE of the writeToSummary methods work! The readFromSummary methods work fine...
    * */
//        writeToSummary(connectionProperties);
//        writeToSummaryV2(connectionProperties);
    writeToSummaryV3(connectionProperties);
}

private static void testConnection() throws ClassNotFoundException, SQLException {
    Class.forName("com.mysql.jdbc.Driver");
    Connection connection = DriverManager.getConnection(MYSQL_URL, MYSQL_USERNAME, MYSQL_PASSWORD);
    boolean result = connection.isClosed();
    System.out.println("@@ is connection closed?? ==> " + result);
}

private static SparkSession getSparkSession(){
    return SparkSession.builder().master("local[2]").appName("readUsageSummaryV2").getOrCreate();
}

private static SQLContext getSqlContext() {
    SparkConf sparkConf = new SparkConf()
            .setAppName("saveUsageSummary")
            .setMaster("local[2]");

    JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);
    return new SQLContext(javaSparkContext);
}

private static void readFromSummary(Properties connectionProperties) {
    Dataset dataSet = getSqlContext().read().jdbc(MYSQL_URL, "summary", connectionProperties);
    dataSet.printSchema();

    dataSet.select("id","cycle_key", "product", "access_method", "billed", "received_date")
            .limit(5)
            .show();
}

private static void readFromSummaryV2(Properties connectionProperties) {
    Dataset dataSet = getSparkSession().read().jdbc(MYSQL_URL, "summary", connectionProperties);
    dataSet.select("id","cycle_key", "product", "access_method", "billed", "received_date")
            .limit(5)
            .show();
}

private static void writeToSummary(Properties connectionProperties) {
    SQLContext sqlContext = getSqlContext();
    sqlContext.tables("usages")
            .write()
//                .mode(SaveMode.Append)
            .jdbc(MYSQL_URL, "summary", connectionProperties);
}

private static void writeToSummaryV2(Properties connectionProperties) {
    SQLContext sqlContext = getSqlContext();
    sqlContext.table("summary")
            .write()
//        .mode(SaveMode.Append)
            .jdbc(MYSQL_URL, "summary", connectionProperties);
}

private static void writeToSummaryV3(Properties connectionProperties) {
    SQLContext sqlContext = getSqlContext();
    sqlContext.sql("SELECT * FROM summary LIMIT 5")
            .write()
//        .mode(SaveMode.Append)
            .jdbc(MYSQL_URL, "summary", connectionProperties);
}

}

【问题讨论】:

    标签: java mysql apache-spark


    【解决方案1】:

    答案总是很简单...我以全新的眼光重新阅读了文档,并了解到要使其正常工作,Dataset.write() 方法必须编写已存在于 Spark SQL 上下文中的内容。所以我可以让它针对通过从数据库读取创建的数据集进行写入,如下所示:

    private static void writeToSummaryV4(Properties connectionProperties) {
        Dataset summary = getSparkSession().read().jdbc(MYSQL_URL, "summary", connectionProperties);
        summary.select("comp_code","cycle_key", "product", "access_method", "billed", "received_date")
                .limit(5)
                .show();
    
        summary.write().mode(SaveMode.Append).jdbc(MYSQL_URL, "summary", connectionProperties);
    } 
    

    【讨论】:

      【解决方案2】:

      另一种简单的方法是简单地传递一个 Spark 数据集并将其写入您想要的任何数据库,只需传递正确的数据库连接字符串,如下面的示例,它写入 MySQL 数据库。

      private static void writeToSummaryV4(Dataset summary) {
          summary.write()
                  .format("jdbc")
                  .option("url", MYSQL_URL)
                  .option("dbtable", MYSQL_DATABASE + "." + MYSQL_SUMMARY_TABLE)
                  .option("user", MYSQL_USERNAME)
                  .option("password", MYSQL_PASSWORD)
                  .mode(SaveMode.Append)
                  .save();
      } 
      

      对我来说,我需要从 Cassandra 数据库中读取一些内容,然后将其加载到 MySQL 数据库中。所以我可以像这样轻松地从 Cassandra DB 获取数据集:

      private static Dataset readFromCassandraSummary() {
          return getSparkSession().read()
                  .format("org.apache.spark.sql.cassandra")
                  .option("keyspace", "usage")
                  .option("table", "summary")
                  .load();
      }
      

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 2019-04-06
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2018-03-29
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多