lib/elastic_graph/graphql/http_endpoint.rb



# Copyright 2024 Block, Inc.
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.
#
# frozen_string_literal: true

require "elastic_graph/graphql/client"
require "elastic_graph/support/memoizable_data"
require "json"
require "uri"

module ElasticGraph
  class GraphQL
    # Handles HTTP concerns for when ElasticGraph is served via HTTP. The logic here
    # is based on the graphql.org recommendations:
    #
    # https://graphql.org/learn/serving-over-http/#http-methods-headers-and-body
    #
    # As that recommends, we support queries in 3 different HTTP forms:
    #
    # - A standard POST request as application/json with query/operationName/variables in the body.
    # - A GET request with `query`, `operationName` and `variables` query params in the URL.
    # - A POST as application/graphql with a query string in the body.
    #
    # Note that this is designed to be agnostic to what the calling HTTP context is (for example,
    # AWS Lambda, or Rails, or Rack...). Instead, this uses simple Request/Response value objects
    # that the calling context can easily translate to/from to use this in any HTTP context.
    class HTTPEndpoint
      APPLICATION_JSON = "application/json"
      APPLICATION_GRAPHQL = "application/graphql"

      def initialize(query_executor:, monotonic_clock:, client_resolver:)
        @query_executor = query_executor
        @monotonic_clock = monotonic_clock
        @client_resolver = client_resolver
      end

      # Processes the given HTTP request, returning an HTTP response.
      #
      # `max_timeout_in_ms` is not a property of the HTTP request (the
      # calling application will determine it instead!) so it is a separate argument.
      #
      # Note that this method does _not_ convert exceptions to 500 responses. It's up to
      # the calling application to do that if it wants to (and to determine how much of the
      # exception to return in the HTTP response...).
      def process(request, max_timeout_in_ms: nil, start_time_in_ms: @monotonic_clock.now_in_ms)
        client_or_response = @client_resolver.resolve(request)
        return client_or_response if client_or_response.is_a?(HTTPResponse)

        with_parsed_request(request, max_timeout_in_ms: max_timeout_in_ms) do |parsed|
          result = @query_executor.execute(
            parsed.query_string,
            variables: parsed.variables,
            operation_name: parsed.operation_name,
            client: client_or_response,
            timeout_in_ms: parsed.timeout_in_ms,
            context: parsed.context,
            start_time_in_ms: start_time_in_ms
          )

          HTTPResponse.json(200, result.to_h)
        end
      rescue Errors::RequestExceededDeadlineError
        HTTPResponse.error(504, "Search exceeded requested timeout.")
      end

      private

      # Helper method that converts `HTTPRequest` to a parsed form we can work with.
      # If the request can be successfully parsed, a `ParsedRequest` will be yielded;
      # otherwise an `HTTPResponse` will be returned with an error.
      def with_parsed_request(request, max_timeout_in_ms:)
        with_request_params(request) do |params|
          with_timeout(request, max_timeout_in_ms: max_timeout_in_ms) do |timeout_in_ms|
            with_context(request) do |context|
              yield ParsedRequest.new(
                query_string: params["query"],
                variables: params["variables"] || {},
                operation_name: params["operationName"],
                timeout_in_ms: timeout_in_ms,
                context: context
              )
            end
          end
        end
      end

      # Responsible for handling the 3 types of requests we need to handle:
      #
      # - A standard POST request as application/json with query/operationName/variables in the body.
      # - A GET request with `query`, `operationName` and `variables` query params in the URL.
      # - A POST as application/graphql with a query string in the body.
      #
      # This yields a hash containing the query/operationName/variables if successful; otherwise
      # it returns an `HTTPResponse` with an error.
      def with_request_params(request)
        params =
          # POST with application/json is the most common form requests take, so we have it as the first branch here.
          if request.http_method == :post && request.content_type == APPLICATION_JSON
            begin
              ::JSON.parse(request.body.to_s)
            rescue ::JSON::ParserError
              # standard:disable Lint/NoReturnInBeginEndBlocks
              return HTTPResponse.error(400, "Request body is invalid JSON.")
              # standard:enable Lint/NoReturnInBeginEndBlocks
            end

          elsif request.http_method == :post && request.content_type == APPLICATION_GRAPHQL
            {"query" => request.body}

          elsif request.http_method == :post
            return HTTPResponse.error(415, "`#{request.content_type}` is not a supported content type. Only `#{APPLICATION_JSON}` and `#{APPLICATION_GRAPHQL}` are supported.")

          elsif request.http_method == :get
            ::URI.decode_www_form(::URI.parse(request.url).query.to_s).to_h.tap do |hash|
              # Variables must come in as JSON, even if in the URL. express-graphql does it this way,
              # which is a bit of a canonical implementation, as it is referenced from graphql.org:
              # https://github.com/graphql/express-graphql/blob/v0.12.0/src/index.ts#L492-L497
              hash["variables"] &&= ::JSON.parse(hash["variables"])
            rescue ::JSON::ParserError
              return HTTPResponse.error(400, "Variables are invalid JSON.")
            end

          else
            return HTTPResponse.error(405, "GraphQL only supports GET and POST requests.")
          end

        # Ignore an empty string operationName.
        params = params.merge("operationName" => nil) if params["operationName"] && params["operationName"].empty?

        if (variables = params["variables"]) && !variables.is_a?(::Hash)
          return HTTPResponse.error(400, "`variables` must be a JSON object but was not.")
        end

        yield params
      end

      # Responsible for figuring out the timeout, based on a header and a provided max.
      # If successful, yields the timeout value; otherwise will return an `HTTPResponse` with
      # an error.
      def with_timeout(request, max_timeout_in_ms:)
        requested_timeout_in_ms =
          if (timeout_in_ms_str = request.normalized_headers[HTTPRequest.normalize_header_name(TIMEOUT_MS_HEADER)])
            begin
              Integer(timeout_in_ms_str)
            rescue ::ArgumentError
              # standard:disable Lint/NoReturnInBeginEndBlocks
              return HTTPResponse.error(400, "`#{TIMEOUT_MS_HEADER}` header value of #{timeout_in_ms_str.inspect} is invalid")
              # standard:enable Lint/NoReturnInBeginEndBlocks
            end
          end

        yield [max_timeout_in_ms, requested_timeout_in_ms].compact.min
      end

      # Responsible for determining any `context` values to pass down into the `query_executor`,
      # which in turn will make the values available to the GraphQL resolvers.
      #
      # By default, our only context value is the HTTP request. This method exists to provide an extension
      # point so that ElasticGraph extensions can add `context` values based on the `request` as desired.
      #
      # Extensions can return an `HTTPResponse` with an error if the `request` is invalid according
      # to their requirements. Otherwise, they must call `super` (to delegate to this and any other
      # extensions) with a block. In the block, they must merge in their `context` values and then `yield`.
      def with_context(request)
        yield({http_request: request})
      end

      ParsedRequest = Data.define(:query_string, :variables, :operation_name, :timeout_in_ms, :context)
    end

    # Represents an HTTP request, containing:
    #
    # - http_method: a symbol like :get or :post.
    # - url: a string containing the full URL.
    # - headers: a hash with string keys and values containing HTTP headers. The headers can
    #   be in any form like `Content-Type`, `content-type`, `CONTENT-TYPE`, `CONTENT_TYPE`, etc.
    # - body: a string containing the request body, if there was one.
    HTTPRequest = Support::MemoizableData.define(:http_method, :url, :headers, :body) do
      # @implements HTTPRequest

      # HTTP headers are intended to be case-insensitive, and different Web frameworks treat them differently.
      # For example, Rack uppercases them with `_` in place of `-`.  With AWS Lambda proxy integrations API
      # gateway HTTP APIs, header names are lowercased:
      # https://docs.aws.amazon.com/apigateway/latest/developerguide/http-api-develop-integrations-lambda.html
      #
      # ...but for integration with API gateway REST APIs, header names are provided as-is:
      # https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format
      #
      # To be maximally compatible here, this normalizes to uppercase form with dashes in place of underscores.
      def normalized_headers
        @normalized_headers ||= headers.transform_keys do |key|
          HTTPRequest.normalize_header_name(key)
        end
      end

      def content_type
        normalized_headers["CONTENT-TYPE"]
      end

      def self.normalize_header_name(header)
        header.upcase.tr("_", "-")
      end
    end

    # Represents an HTTP response, containing:
    #
    # - status_code: an integer like 200.
    # - headers: a hash with string keys and values containing HTTP response headers.
    # - body: a string containing the response body.
    HTTPResponse = Data.define(:status_code, :headers, :body) do
      # @implements HTTPResponse

      # Helper method for building a JSON response.
      def self.json(status_code, body)
        new(status_code, {"Content-Type" => HTTPEndpoint::APPLICATION_JSON}, ::JSON.generate(body))
      end

      # Helper method for building an error response.
      def self.error(status_code, message)
        json(status_code, {"errors" => [{"message" => message}]})
      end
    end

    # Steep weirdly expects them here...
    # @dynamic initialize, config, logger, runtime_metadata, graphql_schema_string, datastore_core, clock
    # @dynamic graphql_http_endpoint, graphql_query_executor, schema, datastore_search_router, filter_interpreter, filter_node_interpreter
    # @dynamic datastore_query_builder, graphql_gem_plugins, graphql_resolvers, datastore_query_adapters, monotonic_clock
    # @dynamic load_dependencies_eagerly, self.from_parsed_yaml, filter_args_translator, sub_aggregation_grouping_adapter
  end
end