【问题标题】:Snowflake Pandas connection issue雪花熊猫连接问题
【发布时间】:2020-11-02 19:50:27
【问题描述】:

我正在尝试使用 pandas 连接器将数据帧插入雪花表,但遇到了权限问题,但使用“普通”雪花连接器可以正常工作。

import snowflake.connector as snow
from snowflake.connector.pandas_tools import *

cur = self.conn.cursor()
my_schema = "my_schema"
my_table = "my_table"
cur.execute(f"""INSERT INTO {my_schema}.{my_table}(load_date, some_id)
 values (current_timestamp, 'xxx')""")
write_pandas(self.conn, daily_epc_df, table_name=my_table, schema=my_schema)

但我得到了

  File "/Users/abohr/virtualenv/peak38/lib/python3.8/site-packages/snowflake/connector/errors.py", line 85, in default_errorhandler
    raise error_class(
snowflake.connector.errors.ProgrammingError: 001757 (42601): SQL compilation error:
Table '"my_schema"."my_table"' does not exist

相同的连接可以插入,然后不能在同一张表上工作。

我也试过

df.to_sql(..., method=pd_writer)

得到

pandas.io.sql.DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting

如果我尝试连接到 Snowflake,为什么要谈论 sqlite_master?熊猫功能需要不同的连接吗? 我的库:

name = "snowflake-connector-python"
version = "2.3.3"
description = "Snowflake Connector for Python"
category = "main"
optional = false
python-versions = ">=3.5"


[[package]]
name = "pandas"
version = "1.0.5"
description = "Powerful data structures for data analysis, time series, and statistics"
category = "main"
optional = false
python-versions = ">=3.6.1"```
on Python 3.8

【问题讨论】:

  • 关于您的 write_pandas() 错误 - 该表是否存在?提供相关的代码片段也可能会有所帮助。
  • 你需要像麦克说的那样显示代码。您确定您用于使用 Python 登录 Snowflake 的角色可以访问该表吗?关于您的 df.to_sql 问题,请在以下链接中查看我的答案,该链接解释了为什么这不起作用:stackoverflow.com/questions/64505552/…
  • @SimonD - 所以我在仔细查看错误消息后发现了问题:'"my_schema"."my_table"' 我在 上设置了 quote_identifiers=False write_to_pandas 并且有效。为什么默认行为是使用不兼容的语法?确定我做错了吗?

标签: snowflake-cloud-data-platform


【解决方案1】:

我发现了我的问题 - 我添加了 quote_identifiers=Falsewrite_pandas(self.conn, daily_epc_df, table_name=my_table, schema=my_schema, quote_identifiers=False) 现在它可以工作了。 但是默认行为会被破坏似乎是非常错误的,我仍然很怀疑。

【讨论】:

    【解决方案2】:

    嗯,我做了一些我认为比熊猫更优秀的事情......

    如果你同意,请告诉我...

    CREATE OR REPLACE PROCEDURE DEMO_DB.PUBLIC.SNOWBALL(
        db_name STRING,
        schema_name STRING,
        snowball_table STRING,
        max_age_days FLOAT,
        limit FLOAT
      )
      RETURNS VARIANT
      LANGUAGE JAVASCRIPT
      COMMENT = 'Collects table and column stats.'
      EXECUTE AS OWNER
      AS
    $$
     
    var validLimit = Math.max(LIMIT, 0); // prevent SQL syntax error caused by negative numbers
    var sqlGenerateInserts = `
    WITH snowball_tables AS (
      SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
        FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.TABLES
      ),
    snowball_columns AS (
      SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
        FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.COLUMNS
      ),
    snowball AS (
      SELECT table_name, MAX(stats_run_date_time) AS stats_run_date_time
        FROM IDENTIFIER(?) -- <<SNOWBALL_TABLE>> table
       GROUP BY table_name
    )
     
    SELECT full_table_name, aprox_row_count,
        CONCAT (
          'INSERT INTO IDENTIFIER(''', ?, ''') ', -- SNOWBALL table
          '(table_name,total_rows,table_last_altered,table_created,table_bytes,col_name,',
          'col_data_type,col_hll,col_avg_length,col_null_cnt,col_min,col_max,col_top,col_mode,col_avg,stats_run_date_time)',
          'SELECT ''', full_table_name, ''' AS table_name, ',
          table_stats_sql,
          ', ARRAY_CONSTRUCT( ', col_name, ') AS col_name',
          ', ARRAY_CONSTRUCT( ', col_data_type, ') AS col_data_type',
          ', ARRAY_CONSTRUCT( ', col_hll, ') AS col_hll',
          ', ARRAY_CONSTRUCT( ', col_avg_length, ') AS col_avg_length',
          ', ARRAY_CONSTRUCT( ', col_null_cnt, ') AS col_null_cnt',
          ', ARRAY_CONSTRUCT( ', col_min, ') AS col_min',
          ', ARRAY_CONSTRUCT( ', col_max, ') AS col_max',
          ', ARRAY_CONSTRUCT( ', col_top, ') AS col_top',
          ', ARRAY_CONSTRUCT( ', col_MODE, ') AS col_MODE',
          ', ARRAY_CONSTRUCT( ', col_AVG, ') AS col_AVG',
          ', CURRENT_TIMESTAMP() AS stats_run_date_time ',
          ' FROM ', quoted_table_name
        ) AS insert_sql
    FROM (
        SELECT
            tbl.full_table_name,
            tbl.row_count AS aprox_row_count,
            CONCAT ( '"', col.table_catalog, '"."',  col.table_schema, '"."',  col.table_name, '"' ) AS quoted_table_name,
            CONCAT (
              'COUNT(1) AS total_rows,''',
              IFNULL( tbl.last_altered::VARCHAR, 'NULL'), ''' AS table_last_altered,''',
              IFNULL( tbl.created::VARCHAR, 'NULL'), ''' AS table_created,',
              IFNULL( tbl.bytes::VARCHAR, 'NULL'), ' AS table_bytes' ) AS table_stats_sql,
            LISTAGG (
              CONCAT ('''', col.full_table_name, '.', col.column_name, '''' ), ', '
              ) AS col_name,
            LISTAGG ( CONCAT('''', col.data_type, '''' ), ', ' ) AS col_data_type,
            LISTAGG ( CONCAT( ' HLL(', '"', col.column_name, '"',') ' ), ', ' ) AS col_hll,
            LISTAGG ( CONCAT( ' AVG(ZEROIFNULL(LENGTH(', '"', col.column_name, '"','))) ' ), ', ' ) AS col_avg_length,
            LISTAGG ( CONCAT( ' SUM( IFF( ', '"', col.column_name, '"',' IS NULL, 1, 0) ) ' ), ', ') AS col_null_cnt,
            LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MODE(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_MODE,
            LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MIN(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', '  ) AS col_min,
            LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MAX(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_max,
            LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' AVG(', '"', col.column_name,'"',') ' ), 'NULL' ), ', ' ) AS col_AVG,
            LISTAGG ( CONCAT ( ' APPROX_TOP_K(', '"', col.column_name, '"', ', 100, 10000)' ), ', ' ) AS col_top
        FROM snowball_tables tbl JOIN snowball_columns col ON col.full_table_name = tbl.full_table_name
        LEFT OUTER JOIN snowball sb ON sb.table_name = tbl.full_table_name
        WHERE (tbl.table_catalog, tbl.table_schema) = (?, ?)
             AND ( sb.table_name IS NULL OR sb.stats_run_date_time < TIMESTAMPADD(DAY, - FLOOR(?), CURRENT_TIMESTAMP()) )
             --AND tbl.row_count > 0 -- NB: also excludes views (table_type = 'VIEW')
        GROUP BY tbl.full_table_name, aprox_row_count, quoted_table_name, table_stats_sql, stats_run_date_time
        ORDER BY stats_run_date_time NULLS FIRST )
        LIMIT ` + validLimit; 
     
    var tablesAnalysed = [];
    var currentSql;
     
    try {
      currentSql = sqlGenerateInserts;
      var generateInserts = snowflake.createStatement( {
              sqlText: currentSql,
              binds: [
                  `"${DB_NAME}".information_schema.tables`,
                  `"${DB_NAME}".information_schema.columns`,
                  SNOWBALL_TABLE, SNOWBALL_TABLE,
                  DB_NAME, SCHEMA_NAME, MAX_AGE_DAYS, LIMIT
                ]
            } );
     
      var insertStatements = generateInserts.execute();
      // loop over generated INSERT statements and execute them
      while (insertStatements.next()) {
        var tableName = insertStatements.getColumnValue('FULL_TABLE_NAME');
        currentSql = insertStatements.getColumnValue('INSERT_SQL');
        var insertStatement = snowflake.createStatement( {
                sqlText: currentSql,
                binds: [ SNOWBALL_TABLE ]
              } );
        var insertResult = insertStatement.execute();
        tablesAnalysed.push(tableName);
      }
     
      return { result: "SUCCESS", analysedTables: tablesAnalysed };
     
    }
    catch (err)  {
      return {
          error: err,
          analysedTables: tablesAnalysed,
          sql: currentSql
      };
    }
     
    $$;

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2020-11-05
      • 2020-12-18
      • 2021-07-22
      • 2022-07-27
      • 2020-02-12
      • 1970-01-01
      相关资源
      最近更新 更多