【问题标题】:Open CSV Performance to write data打开 CSV Performance 写入数据
【发布时间】:2020-03-20 20:30:30
【问题描述】:

我通过一个链接来:https://github.com/hyee/OpenCSV,由于 setAsyncMode、RESULT_FETCH_SIZE,它大大提高了 JDBC ResultSet 到 CSV 的写入时间

//Extract ResultSet to CSV file, auto-compress if the fileName extension is ".zip" or ".gz"
//Returns number of records extracted
public int ResultSet2CSV(final ResultSet rs, final String fileName, final String header, final boolean aync) throws Exception {
    try (CSVWriter writer = new CSVWriter(fileName)) {
        //Define fetch size(default as 30000 rows), higher to be faster performance but takes more memory
        ResultSetHelperService.RESULT_FETCH_SIZE=10000;
        //Define MAX extract rows, -1 means unlimited.
        ResultSetHelperService.MAX_FETCH_ROWS=20000;
        writer.setAsyncMode(aync);
        int result = writer.writeAll(rs, true);
        return result - 1;
    }
}

但问题是我不知道如何将上述合并到我的要求中。由于该链接涉及许多其他类,因此我不确定它们的作用以及是否需要它来满足我的要求。尽管如此,我还是尝试过,但每当我启用 2 条注释行代码时,它都无法编译。下面是我的代码。

任何关于我如何实现这一点的帮助将不胜感激。

package test;



import java.io.BufferedWriter;
import java.io.FileWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;


import com.opencsv.CSVWriter;
import com.opencsv.ResultSetHelperService;

public class OpenCSVTest1
{

    static Connection con =null;
    static Statement stmt = null;
    static ResultSet rs = null;

    public static void main(String args[]) throws Exception
    { 


        connection ();
        retrieveData(con);

    }

    private static void connection() throws Exception 
    {


        try
        {
            Class.forName("<jdbcdriver>");
            con = DriverManager.getConnection("jdbc:","<username>","<pass>");
            System.out.println("Connection successful");
        }


        catch (Exception e)
        {
            System.out.println("Exception while establishing sql connection");
            throw e;
        }
    }


    private static void retrieveData(Connection con) throws Exception
    {
        try
        {
            stmt=con.createStatement(); 
            stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
            String query = "SELECT  * FROM dbo.tablename";

            rs=stmt.executeQuery(query);

            CSVWriter writer = new CSVWriter(new BufferedWriter(new FileWriter("C:\\Data\\File1.csv")));    

            ResultSetHelperService service = new ResultSetHelperService(); 

            /***    ResultSetHelperService.RESULT_FETCH_SIZE=10000;   ***/    // to add 


            service.setDateTimeFormat("yyyy-MM-dd HH:mm:ss.SSS"); 

            System.out.println("**** Started writing Data to CSV **** " +  new Date());         

            writer.setResultService(service);

            /***   writer.setAsyncMode(aync);  ***/   // to add 


            int lines = writer.writeAll(rs, true, true, false); 

            writer.flush();
            writer.close();

            System.out.println("** OpenCSV -Completed writing the resultSet at " +  new Date() + " Number of lines written to the file " + lines);  
        }


        catch (Exception e)
        {
            System.out.println("Exception while retrieving data" );
            e.printStackTrace();
            throw e;
        }

        finally 
        {
            rs.close();
            stmt.close();
            con.close();

        }
    }







}

更新

我已经更新了我的代码。现在代码正在使用 writeAll 方法一次在 CSV 中写入完整的结果集,这会导致时间消耗。

现在我要做的是将结果集批量写入 CSV,因为结果集的第一列将始终动态通过 SELECT 查询自动增量列 (Sqno) 生成,值为 (1,2,3 ..) 所以我不确定如何读取结果集的第一列并将其拆分以写入 CSV。可能是 HashMap 可能会有所帮助,所以如果需要,我还添加了结果集到哈希图的转换代码。

import com.opencsv.CSVWriter;
import com.opencsv.ResultSetHelperService;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class OpenCSVTest1
{ 
    static int fetchlimit_src  = 100;
    static Connection con =null;
    static Statement stmt = null;
    static ResultSet rs = null;
    static String filename = "C:\\Data\\filename.csv";
    static CSVWriter writer;
    public static void main(String args[])
    {  
        try
        {  
            connection();
            retrieveData(con);
        }
        catch(Exception e)
        { 
            System.out.println(e);
        }  
    }
    private static void connection() throws Exception 
    {
        try
        {
            Class.forName("<jdbcdriver>");
            con = DriverManager.getConnection("jdbc:","<username>","<pass>");
            System.out.println("Connection successful");
        }
        catch (Exception e)
        {
            System.out.println("Exception while establishing sql connection");
            throw e;
        }
    }  
    private static void retrieveData(Connection con) throws Exception
    {
        try
        {
            stmt=con.createStatement(); 
            String query = "SELECT ROWNUM AS Sqno, * FROM dbo.tablename ";   // Oracle
            //  String query = "SELECT ROW_NUMBER() OVER(ORDER BY Id ASC) AS Sqno, *  FROM dbo.tablename ";  // SQLServer
            System.out.println(query);
            stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
            stmt.setFetchSize(fetchlimit_src);
            System.out.println("**** Started querying src **** " +  new Date());
            rs=stmt.executeQuery(query);
            System.out.println("**** Completing querying src **** " +  new Date());
            //  resultset_List(rs);   // If required store resultset(rs) to HashMap
            writetoCSV(rs,filename);  
            /** How to write resultset to CSV in batches instead of writing all at once to speed up write performance ? 
             * Hint: resultset first column is Autoincrement [Sqno] (1,2,3...) which might help to split result in batches.
             *
             **/
        }
        catch (Exception e)
        {
            System.out.println("Exception while retrieving data" );
            e.printStackTrace();
            throw e;
        }
        finally 
        {
            rs.close();
            stmt.close();
            con.close();
        }
    }
    private static List<Map<String, Object>> resultset_List(ResultSet rs) throws SQLException
    {
        ResultSetMetaData md = rs.getMetaData();
        int columns = md.getColumnCount();
        List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
        while (rs.next())
        {
            Map<String, Object> row = new HashMap<String, Object>(columns);
            for(int i = 1; i <= columns; ++i)
            {
                row.put(md.getColumnName(i), rs.getObject(i));
            }
            rows.add(row);
        }
        //    System.out.println(rows.toString());
        return rows;
    }
    private static void writetoCSV(ResultSet rs, String filename) throws Exception
    {
        try
        {
            writer = new CSVWriter(new BufferedWriter(new FileWriter(filename)));
            ResultSetHelperService service = new ResultSetHelperService();
            service.setDateTimeFormat("yyyy-MM-dd HH:mm:ss.SSS");
            long batchlimit = 1000;
            long Sqno  = 1;
            ResultSetMetaData rsmd = rs.getMetaData();
            String columnname = rsmd.getColumnLabel(1);  // To retrieve columns with labels (for example SELECT ROWNUM AS Sqno)
            System.out.println("**** Started writing Data to CSV **** " +  new Date());
            writer.setResultService(service);
            int lines = writer.writeAll(rs, true, true, false); 
    System.out.println("** OpenCSV -Completed writing the resultSet at " +  new Date() + " Number of lines written to the file " + lines);
        }
        catch (Exception e)
        {
            System.out.println("Exception while writing data" );
            e.printStackTrace();
            throw e;
        }
        finally
        {
            writer.flush();
            writer.close();
        }
    }
}  

【问题讨论】:

    标签: java jdbc


    【解决方案1】:

    您应该能够使用 OpenCSV 示例,与文档中提供的内容几乎完全相同。因此,您应该不需要编写任何自己的批处理逻辑。

    我能够在大约 10 秒内将 600 万条记录结果集写入 CSV 文件。需要明确的是 - 这只是文件写入时间,而不是数据库数据获取时间 - 但我认为这应该足够快以满足您的需求。

    这是您的代码,根据其记录的方法对使用 OpenCSV 进行了调整...但请参阅我笔记末尾的警告!

    import com.opencsv.CSVWriter;
    import com.opencsv.ResultSetHelperService;
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.ResultSet;
    import java.sql.Statement;
    import java.util.Date;
    import java.text.SimpleDateFormat;
    
    public class OpenCSVDemo {
    
        static int fetchlimit_src = 100;
        static Connection con = null;
        static Statement stmt = null;
        static ResultSet rs = null;
        static String filename = "C:\\Data\\filename.csv";
    
        public static void main(String args[]) {
            try {
                connection();
                retrieveData(con);
    
            } catch (Exception e) {
                System.out.println(e);
            }
        }
    
        private static void connection() throws Exception {
            try {
                final String jdbcDriver = "YOURS GOES HERE";
                final String dbUrl = "YOURS GOES HERE";
                final String user = "YOURS GOES HERE";
                final String pass = "YOURS GOES HERE";
                Class.forName(jdbcDriver);
                con = DriverManager.getConnection(dbUrl, user, pass);
                System.out.println("Connection successful");
            } catch (Exception e) {
                System.out.println("Exception while establishing sql connection");
                throw e;
            }
        }
    
        private static void retrieveData(Connection con) throws Exception {
            try {
                stmt = con.createStatement();
                String query = "select title_id, primary_title from imdb.title";
                System.out.println(query);
                stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
                stmt.setFetchSize(fetchlimit_src);
                System.out.println("**** Started querying src **** " + new Date());
                rs = stmt.executeQuery(query);
                System.out.println("**** Completing querying src **** " + new Date());
                //  resultset_List(rs);   // If required store resultset(rs) to HashMap
    
                System.out.println();
                String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date());
                System.out.println("Started writing CSV:  " + timeStamp);
                writeToCsv(rs, filename, null, Boolean.FALSE);
                timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date());
                System.out.println("Finished writing CSV: " + timeStamp);
                System.out.println();
    
            } catch (Exception e) {
                System.out.println("Exception while retrieving data");
                e.printStackTrace();
                throw e;
            } finally {
                rs.close();
                stmt.close();
                con.close();
            }
        }
    
        public static int writeToCsv(final ResultSet rs, final String fileName, 
                final String header, final boolean aync) throws Exception {
            try (CSVWriter writer = new CSVWriter(fileName)) {
                //Define fetch size(default as 30000 rows), higher to be faster performance but takes more memory
                ResultSetHelperService.RESULT_FETCH_SIZE = 1000;
                //Define MAX extract rows, -1 means unlimited.
                ResultSetHelperService.MAX_FETCH_ROWS = 2000;
                writer.setAsyncMode(aync);
                int result = writer.writeAll(rs, true);
                return result - 1;
            }
        }
    
    }
    

    注意事项:

    1) 我使用“异步”设置为 false:

    writeToCsv(rs, filename, null, Boolean.FALSE);
    

    您可能想尝试使用此设置和其他设置,看看它们是否对您有任何显着影响。

    2) 关于您的评论“链接涉及许多其他类”:OpenCSV 库的整个 JAR 文件需要包含在您的项目中,相关的破坏者 JAR 也是如此:

    opencsv.jar
    disruptor-3.3.6.jar
    
    

    要获取 JAR 文件,请转到 GitHub page,单击绿色按钮,选择 zip 下载,解压缩 zip 文件,然后查看“OpenCSV-master\release”文件夹。

    以通常的方式将这两个 JAR 添加到您的项目中(取决于您构建项目的方式)。

    3) 警告:当您使用 Oracle 的 Java 8 JDK/JRE 时,此代码运行正常。如果您尝试使用 OpenJDK(例如 Java 13 或类似版本),它将无法运行。这是因为在幕后对隐藏类进行了一些更改。有兴趣的还有更多详情here

    如果您需要使用 Java 的 OpenJDK 版本,那么使用此 CSV 库所基于的库可能会更好:see here

    【讨论】:

    • 非常感谢@andrewjames 提供了如此详细的解释。这真的很有帮助。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2017-07-28
    • 2017-12-03
    • 2016-08-10
    • 2018-04-18
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多