【问题标题】:Hive JSON SerDe -- ClassCastException: java.lang.Integer cannot be cast to java.lang.DoubleHive JSON SerDe -- ClassCastException: java.lang.Integer 无法转换为 java.lang.Double
【发布时间】:2014-01-13 22:43:36
【问题描述】:

我正在尝试使用 Hive JSON SerDe 将 Twitter JSON 放入 Hive 表中。我首先将 JSON 导入到由 ROW FORMAT SERDE 定义的一个表中,然后将其导入到另一个存储为 RCFile 的表中。它可以工作到一定程度,但后来我得到了以下性质的 ClassCastException:

java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Double
    at org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector.get(JavaDoubleObjectInspector.java:40)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:259)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:307)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
    at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:220)
    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:667)
    at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:141)
    at org.apache.hadoop

这是我用来定义 SerDe 表的架构:

CREATE EXTERNAL TABLE gh_raw (
   coordinates struct <
      coordinates: array <double>,
      type: string>,
   created_at string,
   entities struct <
      hashtags: array <struct <text: string>>,
      media: array <struct <
            display_url: string,
            expanded_url: string,
            media_url: string,
            media_url_https: string,
            sizes: struct <
               large: struct <
                  h: int,
                  resize: string,
                  w: int>,
               medium: struct <
                  h: int,
                  resize: string,
                  w: int>,
               small: struct <
                  h: int,
                  resize: string,
                  w: int>,
               thumb: struct <
                  h: int,
                  resize: string,
                  w: int>>,
            type: string,
            url: string>>,
      urls: array <struct <
            display_url: string,
            expanded_url: string,
            url: string>>,
      user_mentions: array <struct <
            id: int,
            name: string,
            screen_name: string>>>,
   geo struct <
      coordinates: array <double>,
      type: string>,
   id_str string,
   in_reply_to_screen_name string,
   in_reply_to_status_id_str string,
   in_reply_to_user_id_str string,
   place struct <
      attributes: struct <
         locality: string,
         region: string,
         street_address: string>,
      bounding_box: struct <
         coordinates: array <array <array <double>>>,
         type: string>,
      country: string,
      country_code: string,
      full_name: string,
      name: string,
      place_type: string,
      url: string>,
   possibly_sensitive boolean,
   retweeted_status struct <
      coordinates: struct <
         coordinates: array <double>,
         type: string>,
      created_at: string,
      entities: struct <
         hashtags: array <struct <
               text: string>>,
         media: array <struct <
               display_url: string,
               expanded_url: string,
               media_url: string,
               media_url_https: string,
               sizes: struct <
                  large: struct <
                     h: int,
                     resize: string,
                     w: int>,
                  medium: struct <
                     h: int,
                     resize: string,
                     w: int>,
                  small: struct <
                     h: int,
                     resize: string,
                     w: int>,
                  thumb: struct <
                     h: int,
                     resize: string,
                     w: int>>,
               type: string,
               url: string>>,
         urls: array <struct <
               display_url: string,
               expanded_url: string,
               url: string>>,
         user_mentions: array <struct <
               id: int,
               name: string,
               screen_name: string>>>,
      favorited: boolean,
      geo: struct <
         coordinates: array <double>,
         type: string>,
      id_str: string,
      in_reply_to_screen_name: string,
      in_reply_to_status_id_str: string,
      in_reply_to_user_id_str: string,
      place: struct <
         attributes: struct <
         locality: string,
         region: string,
         street_address: string
         >,
         bounding_box: struct <
            coordinates: array <array <array <double>>>,
            type: string>,
         country: string,
         country_code: string,
         full_name: string,
         name: string,
         place_type: string,
         url: string>,
      possibly_sensitive: boolean,
      scopes: struct <
         followers: boolean>,
      source: string,
      text: string,
      truncated: boolean,
      user: struct <
         contributors_enabled: boolean,
         created_at: string,
         default_profile: boolean,
         default_profile_image: boolean,
         description: string,
         favourites_count: int,
         followers_count: int,
         friends_count: int,
         geo_enabled: boolean,
         id: int,
         id_str: string,
         is_translator: boolean,
         lang: string,
         listed_count: int,
         `location`: string,
         name: string,
         profile_background_color: string,
         profile_background_image_url: string,
         profile_background_image_url_https: string,
         profile_background_tile: boolean,
         profile_banner_url: string,
         profile_image_url: string,
         profile_image_url_https: string,
         profile_link_color: string,
         profile_sidebar_border_color: string,
         profile_sidebar_fill_color: string,
         profile_text_color: string,
         profile_use_background_image: boolean,
         protected: boolean,
         screen_name: string,
         statuses_count: int,
         time_zone: string,
         url: string,
         utc_offset: int,
         verified: boolean>>,
   source string,
   text string,
   truncated boolean,
   user struct <
      contributors_enabled: boolean,
      created_at: string,
      default_profile: boolean,
      default_profile_image: boolean,
      description: string,
      favourites_count: int,
      followers_count: int,
      friends_count: int,
      geo_enabled: boolean,
      id: int,
      id_str: string,
      is_translator: boolean,
      lang: string,
      listed_count: int,
      `location`: string,
      name: string,
      profile_background_color: string,
      profile_background_image_url: string,
      profile_background_image_url_https: string,
      profile_background_tile: boolean,
      profile_banner_url: string,
      profile_image_url: string,
      profile_image_url_https: string,
      profile_link_color: string,
      profile_sidebar_border_color: string,
      profile_sidebar_fill_color: string,
      profile_text_color: string,
      profile_use_background_image: boolean,
      protected: boolean,
      screen_name: string,
      statuses_count: int,
      time_zone: string,
      url: string,
      utc_offset: int,
      verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION '/user/ahanna/gh_raw';

我认为这在查找一组坐标或边界框时会崩溃。

我认为这是我正在使用的 JSON SerDe 的一个错误,但我不确定。我已经从头开始编译了我正在使用的那个,有人说他们已经解决了这个问题,但是不行:https://github.com/brndnmtthws/Hive-JSON-Serde

【问题讨论】:

    标签: java json hadoop hive cloudera


    【解决方案1】:

    试试这个 SerDe - https://github.com/rcongiu/Hive-JSON-Serde 。 我在尝试从推文中读取坐标时遇到了同样的异常。使用它为我修复了它!

    二进制文件在这里可用,因此您无需构建它 - http://www.congiu.net/hive-json-serde/

    【讨论】:

      【解决方案2】:

      尝试使用 bigint 而不是 int。它对我有用。

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 2014-04-24
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2015-12-01
        • 2015-12-02
        • 1970-01-01
        相关资源
        最近更新 更多