lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb
# Copyright 2024 Block, Inc. # # Use of this source code is governed by an MIT-style # license that can be found in the LICENSE file or at # https://opensource.org/licenses/MIT. # # frozen_string_literal: true require "elastic_graph/constants" require "elastic_graph/graphql/aggregation/field_path_encoder" require "elastic_graph/support/memoizable_data" module ElasticGraph class GraphQL module Aggregation # Represents a grouping of a timestamp field into a date histogram. # For the relevant Elasticsearch docs, see: # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-datehistogram-aggregation.html # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-composite-aggregation.html#_date_histogram class DateHistogramGrouping < Support::MemoizableData.define(:field_path, :interval, :time_zone, :offset) def key @key ||= FieldPathEncoder.encode(field_path.map(&:name_in_graphql_query)) end def encoded_index_field_path @encoded_index_field_path ||= FieldPathEncoder.join(field_path.filter_map(&:name_in_index)) end def composite_clause(grouping_options: {}) interval_options = INTERVAL_OPTIONS_BY_NAME.fetch(interval) do raise ArgumentError, "#{interval.inspect} is an unsupported interval. Valid values: #{INTERVAL_OPTIONS_BY_NAME.keys.inspect}." end inner_hash = interval_options.merge(grouping_options).merge({ "field" => encoded_index_field_path, "format" => DATASTORE_DATE_TIME_FORMAT, "offset" => offset, "time_zone" => time_zone }.compact) {"date_histogram" => inner_hash} end def non_composite_clause_for(query) # `min_doc_count: 1` is important so we don't have excess buckets when there is a large gap # between document dates. For example, if you group on a field at the year truncation unit, and # a one-off rogue document has an incorrect timestamp for hundreds of years ago, you'll wind # up with a bucket for each intervening year. `min_doc_count: 1` excludes those empty buckets. composite_clause(grouping_options: {"min_doc_count" => 1}) end def inner_meta INNER_META end INNER_META = { # On a date histogram aggregation, the `key` is formatted as a number (milliseconds since epoch). We # need it formatted as a string, which `key_as_string` provides. "key_path" => ["key_as_string"], # Date histogram aggregations do not have any doc count error. Our resolver is generic and expects # there to always be a `doc_count_error_upper_bound`. So we want to tell it to merge an error of `0` # into each bucket. "merge_into_bucket" => {"doc_count_error_upper_bound" => 0} } INTERVAL_OPTIONS_BY_NAME = { # These intervals have only fixed intervals... "millisecond" => {"fixed_interval" => "1ms"}, "second" => {"fixed_interval" => "1s"}, # ...but the rest have calendar intervals, which we prefer. "minute" => {"calendar_interval" => "minute"}, "hour" => {"calendar_interval" => "hour"}, "day" => {"calendar_interval" => "day"}, "week" => {"calendar_interval" => "week"}, "month" => {"calendar_interval" => "month"}, "quarter" => {"calendar_interval" => "quarter"}, "year" => {"calendar_interval" => "year"} } private_constant :INTERVAL_OPTIONS_BY_NAME end end end end