【问题标题】:How to group documents by hour without day in elasticsearch?如何在弹性搜索中按小时对文档进行分组?
【发布时间】:2019-02-10 17:49:14
【问题描述】:

我有一个应用程序,用户每天都会询问一项调查,我希望获得平均回答时间。我尝试了一些请求,但我无法按小时对所有文档进行分组,它是按天按小时分组的..

我这样做:

{
 "aggs": {
      "byHour": {
          "date_histogram": {
              "field": "date",
              "interval": "hour",
              "format" : "H"

                }
            }
        }
    }
}

它按小时包装,但也按日期包装,我希望 day 被忽略。

 [
        {
          "key_as_string": "0",
          "key": 1533945600000,
          "doc_count": 40,
          "group_by_state": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key_as_string": "1",
          "key": 1533949200000,
          "doc_count": 345,
          "group_by_state": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key_as_string": "23",
          "key": 1534028400000,
          "doc_count": 15,
          "group_by_state": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key_as_string": "0",
          "key": 1534032000000,
          "doc_count": 0,
          "group_by_state": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key_as_string": "1",
          "key": 1534035600000,
          "doc_count": 2,
          "group_by_state": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key_as_string": "2",
          "key": 1534039200000,
          "doc_count": 3,
          "group_by_state": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
 ]

类型的映射

{
  "myIndex": {
    "mappings": {
      "answer": {
        "properties": {
          "date": {
            "type": "date"
          },
          "lang": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "level": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "offset": {
            "type": "long"
          },
          "patientCaretrackId": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "protocolId": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "query": {
            "properties": {
              "constant_score": {
                "properties": {
                  "filter": {
                    "properties": {
                      "bool": {
                        "properties": {
                          "must": {
                            "properties": {
                              "term": {
                                "properties": {
                                  "questionId": {
                                    "type": "text",
                                    "fields": {
                                      "keyword": {
                                        "type": "keyword",
                                        "ignore_above": 256
                                      }
                                    }
                                  },
                                  "questionnaireId": {
                                    "type": "text",
                                    "fields": {
                                      "keyword": {
                                        "type": "keyword",
                                        "ignore_above": 256
                                      }
                                    }
                                  }
                                }
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          },
          "questionId": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "questionnaireId": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "surgeonId": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "value": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            },
            "fielddata": true
          }
        }
      }
    }
  }
}

文件示例:

[
{
          "date": "2018-09-11T00:00:00.000Z",
          "lang": "fr",
          "level": "red",
          "offset": 21,
          "patientCaretrackId": "5b894b10a9f7afec73762113",
          "protocolId": "ptg-koos-long-v1",
          "questionnaireId": "j21",
          "surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
          "value": "permanentes",
          "questionId": "frequence-douleur-2"
},
{
          "date": "2018-09-11T00:00:00.000Z",
          "lang": "fr",
          "level": "red",
          "offset": 21,
          "patientCaretrackId": "5b894b10a9f7afec73762113",
          "protocolId": "ptg-koos-long-v1",
          "questionnaireId": "j21",
          "surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
          "value": "permanentes",
          "questionId": "frequence-douleur-2"
        }
]

可以通过查询 Elasticsearch 做到这一点吗?

谢谢,

【问题讨论】:

    标签: elasticsearch elasticsearch-aggregation date-histogram


    【解决方案1】:

    我找到了 thx

    {
    	"size": 0,
     "aggs": {
          "byHour": {
              "date_histogram": {
                  "field": "date",
                  "interval": "hour",
                  "format" : "H",
                  "keyed": true,
                  "time_zone": "+02:00"
                    }
                }
            }
    }

    回应:

    {
        "took": 9,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 2796,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "byHour": {
                "buckets": {
                    "17": {
                        "key_as_string": "17",
                        "key": 1536159600000,
                        "doc_count": 2006
                    },
                    "18": {
                        "key_as_string": "18",
                        "key": 1536163200000,
                        "doc_count": 790
                    }
                }
            }
        }
    }

    【讨论】:

    • 试过这个,但它只在您检索一天时有效,因为这会在结果集中每小时生成一个桶,当您有超过 1 天时,您将获得重复的条目。
    • 不幸的是,这段代码导致值被覆盖,而不是键下的总和
    【解决方案2】:

    您可以将terms 聚合与脚本一起使用:

    https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-script

    GET /_search
    {
        "aggs" : {
            "hours" : {
                "terms" : {
                    "script" : {
                        "source": "doc['date'].getHour()",
                        "lang": "painless"
                    }
                }
            }
        }
    }
    

    (只是给你一个想法,完全不确定脚本本身..)

    【讨论】:

    • 我不知道为什么但是我有这个错误变量[日期]没有定义
    • 我尝试使用文档的不同字段,但我总是 未定义变量 [xxx] 我需要更改类型的元数据来执行此操作?
    • 您是否针对您的索引运行此搜索?你能添加映射+ 2个文档,以便我可以复制它。
    • 我搜索类型` curl -X GET "localhost:9200/orthense/answer/_search" -H 'Content-Type: application/json' -d' { "size": 0, "aggs" : { "hours " : { "terms" : { "script" : { "source": "doc['date'].getHour()", "lang": "painless" } } } } } ' `
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2019-01-23
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多