gem.sh

lib/google/cloud/bigquery/dataset.rb

# Copyright 2015 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


require "json"
require "google/cloud/errors"
require "google/cloud/bigquery/service"
require "google/cloud/bigquery/table"
require "google/cloud/bigquery/model"
require "google/cloud/bigquery/routine"
require "google/cloud/bigquery/external"
require "google/cloud/bigquery/dataset/list"
require "google/cloud/bigquery/dataset/access"
require "google/cloud/bigquery/dataset/tag"
require "google/cloud/bigquery/convert"
require "google/apis/bigquery_v2"

module Google
  module Cloud
    module Bigquery
      ##
      # # Dataset
      #
      # Represents a Dataset. A dataset is a grouping mechanism that holds zero
      # or more tables. Datasets are the lowest level unit of access control;
      # you cannot control access at the table level. A dataset is contained
      # within a specific project.
      #
      # @example
      #   require "google/cloud/bigquery"
      #
      #   bigquery = Google::Cloud::Bigquery.new
      #
      #   dataset = bigquery.create_dataset "my_dataset",
      #                                     name: "My Dataset",
      #                                     description: "This is my Dataset"
      #
      class Dataset
        ##
        # @private The Connection object.
        attr_accessor :service

        ##
        # @private The Google API Client object.
        attr_accessor :gapi

        ##
        # @private A Google API Client Dataset Reference object.
        attr_reader :reference

        ##
        # @private Access Policy Version for get, update, patch, and insert API calls
        attr_accessor :access_policy_version

        ##
        # @private Create an empty Dataset object.
        def initialize
          @service = nil
          @gapi = nil
          @reference = nil
          @access_policy_version = nil
        end

        ##
        # A unique ID for this dataset, without the project name.
        #
        # @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
        #   (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
        #
        # @!group Attributes
        #
        def dataset_id
          return reference.dataset_id if reference?
          @gapi.dataset_reference.dataset_id
        end

        ##
        # The ID of the project containing this dataset.
        #
        # @return [String] The project ID.
        #
        # @!group Attributes
        #
        def project_id
          return reference.project_id if reference?
          @gapi.dataset_reference.project_id
        end

        ##
        # @private
        # The gapi fragment containing the Project ID and Dataset ID as a
        # camel-cased hash.
        def dataset_ref
          dataset_ref = reference? ? reference : @gapi.dataset_reference
          dataset_ref = dataset_ref.to_h if dataset_ref.respond_to? :to_h
          dataset_ref
        end

        ##
        # A descriptive name for the dataset.
        #
        # @return [String, nil] The friendly name, or `nil` if the object is
        #   a reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def name
          return nil if reference?
          @gapi.friendly_name
        end

        ##
        # Updates the descriptive name for the dataset.
        #
        # If the dataset is not a full resource representation (see
        # {#resource_full?}), the full representation will be retrieved before
        # the update to comply with ETag-based optimistic concurrency control.
        #
        # @param [String] new_name The new friendly name, or `nil` if the object
        #   is a reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def name= new_name
          reload! unless resource_full?
          @gapi.update! friendly_name: new_name
          patch_gapi! :friendly_name
        end

        ##
        # The ETag hash of the dataset.
        #
        # @return [String, nil] The ETag hash, or `nil` if the object is a
        #   reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def etag
          return nil if reference?
          ensure_full_data!
          @gapi.etag
        end

        ##
        # A URL that can be used to access the dataset using the REST API.
        #
        # @return [String, nil] A REST URL for the resource, or `nil` if the
        #   object is a reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def api_url
          return nil if reference?
          ensure_full_data!
          @gapi.self_link
        end

        ##
        # A user-friendly description of the dataset.
        #
        # @return [String, nil] The description, or `nil` if the object is a
        #   reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def description
          return nil if reference?
          ensure_full_data!
          @gapi.description
        end

        ##
        # Updates the user-friendly description of the dataset.
        #
        # If the dataset is not a full resource representation (see
        # {#resource_full?}), the full representation will be retrieved before
        # the update to comply with ETag-based optimistic concurrency control.
        #
        # @param [String] new_description The new description for the dataset.
        #
        # @!group Attributes
        #
        def description= new_description
          reload! unless resource_full?
          @gapi.update! description: new_description
          patch_gapi! :description
        end

        ##
        # The default lifetime of all tables in the dataset, in milliseconds.
        #
        # @return [Integer, nil] The default table expiration in milliseconds,
        #   or `nil` if not present or the object is a reference (see
        #   {#reference?}).
        #
        # @!group Attributes
        #
        def default_expiration
          return nil if reference?
          ensure_full_data!
          begin
            Integer @gapi.default_table_expiration_ms
          rescue StandardError
            nil
          end
        end

        ##
        # Updates the default lifetime of all tables in the dataset, in
        # milliseconds.
        #
        # If the dataset is not a full resource representation (see
        # {#resource_full?}), the full representation will be retrieved before
        # the update to comply with ETag-based optimistic concurrency control.
        #
        # @param [Integer] new_default_expiration The new default table
        #   expiration in milliseconds.
        #
        # @!group Attributes
        #
        def default_expiration= new_default_expiration
          reload! unless resource_full?
          @gapi.update! default_table_expiration_ms: new_default_expiration
          patch_gapi! :default_table_expiration_ms
        end

        ##
        # The time when this dataset was created.
        #
        # @return [Time, nil] The creation time, or `nil` if not present or the
        #   object is a reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def created_at
          return nil if reference?
          ensure_full_data!
          Convert.millis_to_time @gapi.creation_time
        end

        ##
        # The date when this dataset or any of its tables was last modified.
        #
        # @return [Time, nil] The last modified time, or `nil` if not present or
        #   the object is a reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def modified_at
          return nil if reference?
          ensure_full_data!
          Convert.millis_to_time @gapi.last_modified_time
        end

        ##
        # The geographic location where the dataset should reside. Possible
        # values include `EU` and `US`. The default value is `US`.
        #
        # @return [String, nil] The geographic location, or `nil` if the object
        #   is a reference (see {#reference?}).
        #
        # @!group Attributes
        #
        def location
          return nil if reference?
          @gapi.location
        end

        ##
        # A hash of user-provided labels associated with this dataset. Labels
        # are used to organize and group datasets. See [Using
        # Labels](https://cloud.google.com/bigquery/docs/labels).
        #
        # The returned hash is frozen and changes are not allowed. Use
        # {#labels=} to replace the entire hash.
        #
        # @return [Hash<String, String>, nil] A hash containing key/value pairs,
        #   or `nil` if the object is a reference (see {#reference?}).
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   labels = dataset.labels
        #   labels["department"] #=> "shipping"
        #
        # @!group Attributes
        #
        def labels
          return nil if reference?
          m = @gapi.labels
          m = m.to_h if m.respond_to? :to_h
          m.dup.freeze
        end

        ##
        # Updates the hash of user-provided labels associated with this dataset.
        # Labels are used to organize and group datasets. See [Using
        # Labels](https://cloud.google.com/bigquery/docs/labels).
        #
        # If the dataset is not a full resource representation (see
        # {#resource_full?}), the full representation will be retrieved before
        # the update to comply with ETag-based optimistic concurrency control.
        #
        # @param [Hash<String, String>] labels A hash containing key/value
        #   pairs.
        #
        #   The labels applied to a resource must meet the following requirements:
        #
        #   * Each resource can have multiple labels, up to a maximum of 64.
        #   * Each label must be a key-value pair.
        #   * Keys have a minimum length of 1 character and a maximum length of
        #     63 characters, and cannot be empty. Values can be empty, and have
        #     a maximum length of 63 characters.
        #   * Keys and values can contain only lowercase letters, numeric characters,
        #     underscores, and dashes. All characters must use UTF-8 encoding, and
        #     international characters are allowed.
        #   * The key portion of a label must be unique. However, you can use the
        #     same key with multiple resources.
        #   * Keys must start with a lowercase letter or international character.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   dataset.labels = { "department" => "shipping" }
        #
        # @!group Attributes
        #
        def labels= labels
          reload! unless resource_full?
          @gapi.labels = labels
          patch_gapi! :labels
        end

        ##
        # The {EncryptionConfiguration} object that represents the default
        # encryption method for all tables and models in the dataset. Once this
        # property is set, all newly-created partitioned tables and models in
        # the dataset will have their encryption set to this value, unless table
        # creation request (or query) overrides it.
        #
        # Present only if this dataset is using custom default encryption.
        #
        # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
        #   Protecting Data with Cloud KMS Keys
        #
        # @return [EncryptionConfiguration, nil] The default encryption
        #   configuration.
        #
        #   @!group Attributes
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   encrypt_config = dataset.default_encryption
        #
        # @!group Attributes
        #
        def default_encryption
          return nil if reference?
          ensure_full_data!
          return nil if @gapi.default_encryption_configuration.nil?
          EncryptionConfiguration.from_gapi(@gapi.default_encryption_configuration).freeze
        end

        ##
        # Set the {EncryptionConfiguration} object that represents the default
        # encryption method for all tables and models in the dataset. Once this
        # property is set, all newly-created partitioned tables and models in
        # the dataset will have their encryption set to this value, unless table
        # creation request (or query) overrides it.
        #
        # If the dataset is not a full resource representation (see
        # {#resource_full?}), the full representation will be retrieved before
        # the update to comply with ETag-based optimistic concurrency control.
        #
        # @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
        #   Protecting Data with Cloud KMS Keys
        #
        # @param [EncryptionConfiguration] value The new encryption config.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
        #   encrypt_config = bigquery.encryption kms_key: key_name
        #
        #   dataset.default_encryption = encrypt_config
        #
        # @!group Attributes
        #
        def default_encryption= value
          ensure_full_data!
          @gapi.default_encryption_configuration = value.to_gapi
          patch_gapi! :default_encryption_configuration
        end

        ##
        # Gets the Storage Billing Model for the dataset.
        #
        # @see https://cloud.google.com/blog/products/data-analytics/new-bigquery-billing-model-helps-reduce-physical-storage-costs
        #
        # @return [String, nil] A string containing the storage billing model, or `nil`.
        #   Possible values of the string are `LOGICAL`, `PHYSICAL`.
        #   It returns `nil` if either the object is a reference (see {#reference?}),
        #   or if the storage billing model is unspecified.
        #
        # @example
        #
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   storage_billing_model = dataset.storage_billing_model
        #
        # @!group Attributes
        #
        def storage_billing_model
          return nil if reference?
          ensure_full_data!
          @gapi.storage_billing_model
        end

        ##
        # Sets the Storage Billing Model for the dataset.
        #
        # @see https://cloud.google.com/blog/products/data-analytics/new-bigquery-billing-model-helps-reduce-physical-storage-costs
        #
        # @param value [String] The new storage billing model. Accepted values
        #   are `LOGICAL` and `PHYSICAL`.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   dataset.storage_billing_model = "LOGICAL"
        #
        # @!group Attributes
        #
        def storage_billing_model= value
          ensure_full_data!
          @gapi.storage_billing_model = value
          patch_gapi! :storage_billing_model
        end

        ##
        # Retrieves the access rules for a Dataset. The rules can be updated
        # when passing a block, see {Dataset::Access} for all the methods
        # available.
        #
        # If the dataset is not a full resource representation (see
        # {#resource_full?}), the full representation will be retrieved before
        # the update to comply with ETag-based optimistic concurrency control.
        #
        # @see https://cloud.google.com/bigquery/access-control BigQuery Access
        #   Control
        #
        # @yield [access] a block for setting rules
        # @yieldparam [Dataset::Access] access the object accepting rules
        #
        # @return [Google::Cloud::Bigquery::Dataset::Access] The access object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   access = dataset.access
        #   access.writer_user? "reader@example.com" #=> false
        #
        # @example Manage the access rules by passing a block:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   dataset.access do |access|
        #     access.add_owner_group "owners@example.com"
        #     access.add_writer_user "writer@example.com"
        #     access.remove_writer_user "readers@example.com"
        #     access.add_reader_special :all
        #     access.add_reader_view other_dataset_view_object
        #   end
        #
        def access
          ensure_full_data!
          reload! unless resource_full?
          access_builder = Access.from_gapi @gapi
          if block_given?
            yield access_builder
            if access_builder.changed?
              @gapi.update! access: access_builder.to_gapi
              patch_gapi! :access
            end
          end
          access_builder.freeze
        end

        ##
        # Retrieves the tags associated with this dataset. Tag keys are
        # globally unique, and managed via the resource manager API.
        #
        # @see https://cloud.google.com/resource-manager/docs/tags/tags-overview
        # for more information.
        #
        # @return [Google::Cloud::Bigquery::Dataset::Tag] The list of tags.
        #
        def tags
          ensure_full_data!
          return nil if @gapi.tags.nil?
          @gapi.tags.map { |gapi| Tag.from_gapi gapi }
        end

        ##
        # Permanently deletes the dataset. The dataset must be empty before it
        # can be deleted unless the `force` option is set to `true`.
        #
        # @param [Boolean] force If `true`, delete all the tables in the
        #   dataset. If `false` and the dataset contains tables, the request
        #   will fail. Default is `false`.
        #
        # @return [Boolean] Returns `true` if the dataset was deleted.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   dataset.delete
        #
        # @!group Lifecycle
        #
        def delete force: nil
          ensure_service!
          service.delete_dataset dataset_id, force
          # Set flag for #exists?
          @exists = false
          true
        end

        ##
        # Creates a new table. If you are adapting existing code that was
        # written for the [Rest API
        # ](https://cloud.google.com/bigquery/docs/reference/v2/tables#resource),
        # you can pass the table's schema as a hash (see example.)
        #
        # @param [String] table_id The ID of the table. The ID must contain only
        #   letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum
        #   length is 1,024 characters.
        # @param [String] name A descriptive name for the table.
        # @param [String] description A user-friendly description of the table.
        # @yield [table] a block for setting the table
        # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
        #   to set additional properties on the table in the API request to
        #   create it.
        #
        # @return [Google::Cloud::Bigquery::Table] A new table object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.create_table "my_table"
        #
        # @example You can also pass name and description options.
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.create_table "my_table",
        #                                name: "My Table",
        #                                description: "A description of table."
        #
        # @example Or the table's schema can be configured with the block.
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.create_table "my_table" do |t|
        #     t.schema.string "first_name", mode: :required
        #     t.schema.record "cities_lived", mode: :required do |s|
        #       s.string "place", mode: :required
        #       s.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example You can define the schema using a nested block.
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.create_table "my_table" do |t|
        #     t.name = "My Table"
        #     t.description = "A description of my table."
        #     t.schema do |s|
        #       s.string "first_name", mode: :required
        #       s.record "cities_lived", mode: :repeated do |r|
        #         r.string "place", mode: :required
        #         r.integer "number_of_years", mode: :required
        #       end
        #     end
        #   end
        #
        # @example With time partitioning and clustering.
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.create_table "my_table" do |t|
        #     t.schema do |schema|
        #       schema.timestamp "dob", mode: :required
        #       schema.string "first_name", mode: :required
        #       schema.string "last_name", mode: :required
        #     end
        #     t.time_partitioning_type  = "DAY"
        #     t.time_partitioning_field = "dob"
        #     t.clustering_fields = ["last_name", "first_name"]
        #   end
        #
        # @example With range partitioning.
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.create_table "my_table" do |t|
        #     t.schema do |schema|
        #       schema.integer "my_table_id", mode: :required
        #       schema.string "my_table_data", mode: :required
        #     end
        #     t.range_partitioning_field = "my_table_id"
        #     t.range_partitioning_start = 0
        #     t.range_partitioning_interval = 10
        #     t.range_partitioning_end = 100
        #   end
        #
        # @!group Table
        #
        def create_table table_id, name: nil, description: nil
          ensure_service!
          new_tb = Google::Apis::BigqueryV2::Table.new(
            table_reference: Google::Apis::BigqueryV2::TableReference.new(
              project_id: project_id, dataset_id: dataset_id,
              table_id: table_id
            )
          )
          updater = Table::Updater.new(new_tb).tap do |tb|
            tb.name = name unless name.nil?
            tb.description = description unless description.nil?
          end

          yield updater if block_given?

          gapi = service.insert_table dataset_id, updater.to_gapi
          Table.from_gapi gapi, service
        end

        ##
        # Creates a new view, which is a virtual table defined by the given SQL query.
        #
        # With BigQuery's logical views, the query that defines the view is re-executed
        # every time the view is queried. Queries are billed according to the total amount
        # of data in all table fields referenced directly or indirectly by the
        # top-level query. (See {Table#view?} and {Table#query}.)
        #
        # For materialized views, see {#create_materialized_view}.
        #
        # @see https://cloud.google.com/bigquery/docs/views Creating views
        #
        # @param [String] table_id The ID of the view table. The ID must contain
        #   only letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The
        #   maximum length is 1,024 characters.
        # @param [String] query The query that BigQuery executes when the view
        #   is referenced.
        # @param [String] name A descriptive name for the table.
        # @param [String] description A user-friendly description of the table.
        # @param [Boolean] standard_sql Specifies whether to use BigQuery's
        #   [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   dialect. Optional. The default value is true.
        # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
        #   [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect. Optional. The default value is false.
        # @param [Array<String>, String] udfs User-defined function resources
        #   used in a legacy SQL query. May be either a code resource to load from
        #   a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
        #   that contains code for a user-defined function (UDF). Providing an
        #   inline code resource is equivalent to providing a URI for a file
        #   containing the same code.
        #
        #   This parameter is used for defining User Defined Function (UDF)
        #   resources only when using legacy SQL. Users of standard SQL should
        #   leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
        #   Routines API to define UDF resources.
        #
        #   For additional information on migrating, see: [Migrating to
        #   standard SQL - Differences in user-defined JavaScript
        #   functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
        #
        # @return [Google::Cloud::Bigquery::Table] A new table object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   view = dataset.create_view "my_view",
        #                              "SELECT name, age FROM proj.dataset.users"
        #
        # @example A name and description can be provided:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   view = dataset.create_view "my_view",
        #                              "SELECT name, age FROM proj.dataset.users",
        #                              name: "My View", description: "This is my view"
        #
        # @!group Table
        #
        def create_view table_id,
                        query,
                        name: nil,
                        description: nil,
                        standard_sql: nil,
                        legacy_sql: nil,
                        udfs: nil
          use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
          new_view_opts = {
            table_reference: Google::Apis::BigqueryV2::TableReference.new(
              project_id: project_id,
              dataset_id: dataset_id,
              table_id:   table_id
            ),
            friendly_name:   name,
            description:     description,
            view:            Google::Apis::BigqueryV2::ViewDefinition.new(
              query:                           query,
              use_legacy_sql:                  use_legacy_sql,
              user_defined_function_resources: udfs_gapi(udfs)
            )
          }.compact
          new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)

          gapi = service.insert_table dataset_id, new_view
          Table.from_gapi gapi, service
        end

        ##
        # Creates a new materialized view.
        #
        # Materialized views are precomputed views that periodically cache results of a query for increased performance
        # and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
        # only delta changes from the base table to compute up-to-date results.
        #
        # Queries that use materialized views are generally faster and consume less resources than queries that retrieve
        # the same data only from the base table. Materialized views are helpful to significantly boost performance of
        # workloads that have the characteristic of common and repeated queries.
        #
        # For logical views, see {#create_view}.
        #
        # @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
        #
        # @param [String] table_id The ID of the materialized view table. The ID must contain only letters (`[A-Za-z]`),
        #   numbers (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
        # @param [String] query The query that BigQuery executes when the materialized view is referenced.
        # @param [String] name A descriptive name for the table.
        # @param [String] description A user-friendly description of the table.
        # @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
        #   updated. Optional. The default value is true.
        # @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
        #   will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
        #
        # @return [Google::Cloud::Bigquery::Table] A new table object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   materialized_view = dataset.create_materialized_view "my_materialized_view",
        #                                                        "SELECT name, age FROM proj.dataset.users"
        #
        # @example Automatic refresh can be disabled:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   materialized_view = dataset.create_materialized_view "my_materialized_view",
        #                                                        "SELECT name, age FROM proj.dataset.users",
        #                                                        enable_refresh: false
        #
        # @!group Table
        #
        def create_materialized_view table_id,
                                     query,
                                     name: nil,
                                     description: nil,
                                     enable_refresh: nil,
                                     refresh_interval_ms: nil
          new_view_opts = {
            table_reference:   Google::Apis::BigqueryV2::TableReference.new(
              project_id: project_id,
              dataset_id: dataset_id,
              table_id:   table_id
            ),
            friendly_name:     name,
            description:       description,
            materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
              enable_refresh:      enable_refresh,
              query:               query,
              refresh_interval_ms: refresh_interval_ms
            )
          }.compact
          new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)

          gapi = service.insert_table dataset_id, new_view
          Table.from_gapi gapi, service
        end

        ##
        # Retrieves an existing table by ID.
        #
        # @param [String] table_id The ID of a table.
        # @param [Boolean] skip_lookup Optionally create just a local reference
        #   object without verifying that the resource exists on the BigQuery
        #   service. Calls made on this object will raise errors if the resource
        #   does not exist. Default is `false`. Optional.
        # @param [String] view Specifies the view that determines which table information is returned.
        #   By default, basic table information and storage statistics (STORAGE_STATS) are returned.
        #   Accepted values include `:unspecified`, `:basic`, `:storage`, and
        #   `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
        #   The default value is the `:unspecified` view type.
        #
        # @return [Google::Cloud::Bigquery::Table, nil] Returns `nil` if the
        #   table does not exist.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.table "my_table"
        #   puts table.name
        #
        # @example Avoid retrieving the table resource with `skip_lookup`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.table "my_table", skip_lookup: true
        #
        # @example Avoid retrieving transient stats of the table with `view`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   table = dataset.table "my_table", view: "basic"
        #
        # @!group Table
        #
        def table table_id, skip_lookup: nil, view: nil
          ensure_service!
          return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
          gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: view
          Table.from_gapi gapi, service, metadata_view: view
        rescue Google::Cloud::NotFoundError
          nil
        end

        ##
        # Retrieves the list of tables belonging to the dataset.
        #
        # @param [String] token A previously-returned page token representing
        #   part of the larger set of results to view.
        # @param [Integer] max Maximum number of tables to return.
        #
        # @return [Array<Google::Cloud::Bigquery::Table>] An array of tables
        #   (See {Google::Cloud::Bigquery::Table::List})
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   tables = dataset.tables
        #   tables.each do |table|
        #     puts table.name
        #   end
        #
        # @example Retrieve all tables: (See {Table::List#all})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   tables = dataset.tables
        #   tables.all do |table|
        #     puts table.name
        #   end
        #
        # @!group Table
        #
        def tables token: nil, max: nil
          ensure_service!
          gapi = service.list_tables dataset_id, token: token, max: max
          Table::List.from_gapi gapi, service, dataset_id, max
        end

        ##
        # Retrieves an existing model by ID.
        #
        # @param [String] model_id The ID of a model.
        # @param [Boolean] skip_lookup Optionally create just a local reference
        #   object without verifying that the resource exists on the BigQuery
        #   service. Calls made on this object will raise errors if the resource
        #   does not exist. Default is `false`. Optional.
        #
        # @return [Google::Cloud::Bigquery::Model, nil] Returns `nil` if the
        #   model does not exist.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   model = dataset.model "my_model"
        #   puts model.model_id
        #
        # @example Avoid retrieving the model resource with `skip_lookup`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   model = dataset.model "my_model", skip_lookup: true
        #
        # @!group Model
        #
        def model model_id, skip_lookup: nil
          ensure_service!
          return Model.new_reference project_id, dataset_id, model_id, service if skip_lookup
          gapi = service.get_model dataset_id, model_id
          Model.from_gapi_json gapi, service
        rescue Google::Cloud::NotFoundError
          nil
        end

        ##
        # Retrieves the list of models belonging to the dataset.
        #
        # @param [String] token A previously-returned page token representing
        #   part of the larger set of results to view.
        # @param [Integer] max Maximum number of models to return.
        #
        # @return [Array<Google::Cloud::Bigquery::Model>] An array of models
        #   (See {Google::Cloud::Bigquery::Model::List})
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   models = dataset.models
        #   models.each do |model|
        #     puts model.model_id
        #   end
        #
        # @example Retrieve all models: (See {Model::List#all})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   models = dataset.models
        #   models.all do |model|
        #     puts model.model_id
        #   end
        #
        # @!group Model
        #
        def models token: nil, max: nil
          ensure_service!
          gapi = service.list_models dataset_id, token: token, max: max
          Model::List.from_gapi gapi, service, dataset_id, max
        end

        ##
        # Creates a new routine. The following attributes may be set in the yielded block:
        # {Routine::Updater#routine_type=}, {Routine::Updater#language=}, {Routine::Updater#arguments=},
        # {Routine::Updater#return_type=}, {Routine::Updater#imported_libraries=}, {Routine::Updater#body=}, and
        # {Routine::Updater#description=}.
        #
        # @param [String] routine_id The ID of the routine. The ID must contain only
        #   letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum length
        #   is 256 characters.
        # @yield [routine] A block for setting properties on the routine.
        # @yieldparam [Google::Cloud::Bigquery::Routine::Updater] routine An updater to set additional properties on the
        #   routine.
        #
        # @return [Google::Cloud::Bigquery::Routine] A new routine object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   routine = dataset.create_routine "my_routine" do |r|
        #     r.routine_type = "SCALAR_FUNCTION"
        #     r.language = "SQL"
        #     r.arguments = [
        #       Google::Cloud::Bigquery::Argument.new(name: "x", data_type: "INT64")
        #     ]
        #     r.body = "x * 3"
        #     r.description = "My routine description"
        #   end
        #
        #   puts routine.routine_id
        #
        # @example Extended example:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   routine = dataset.create_routine "my_routine" do |r|
        #     r.routine_type = "SCALAR_FUNCTION"
        #     r.language = :SQL
        #     r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
        #     r.arguments = [
        #       Google::Cloud::Bigquery::Argument.new(
        #         name: "arr",
        #         argument_kind: "FIXED_TYPE",
        #         data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
        #           type_kind: "ARRAY",
        #           array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
        #             type_kind: "STRUCT",
        #             struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
        #               fields: [
        #                 Google::Cloud::Bigquery::StandardSql::Field.new(
        #                   name: "name",
        #                   type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
        #                 ),
        #                 Google::Cloud::Bigquery::StandardSql::Field.new(
        #                   name: "val",
        #                   type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
        #                 )
        #               ]
        #             )
        #           )
        #         )
        #       )
        #     ]
        #   end
        #
        # @!group Routine
        #
        def create_routine routine_id
          ensure_service!
          new_tb = Google::Apis::BigqueryV2::Routine.new(
            routine_reference: Google::Apis::BigqueryV2::RoutineReference.new(
              project_id: project_id, dataset_id: dataset_id, routine_id: routine_id
            )
          )
          updater = Routine::Updater.new new_tb

          yield updater if block_given?

          gapi = service.insert_routine dataset_id, updater.to_gapi
          Routine.from_gapi gapi, service
        end

        ##
        # Retrieves an existing routine by ID.
        #
        # @param [String] routine_id The ID of a routine.
        # @param [Boolean] skip_lookup Optionally create just a local reference
        #   object without verifying that the resource exists on the BigQuery
        #   service. Calls made on this object will raise errors if the resource
        #   does not exist. Default is `false`. Optional.
        #
        # @return [Google::Cloud::Bigquery::Routine, nil] Returns `nil` if the
        #   routine does not exist.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   routine = dataset.routine "my_routine"
        #   puts routine.routine_id
        #
        # @example Avoid retrieving the routine resource with `skip_lookup`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   routine = dataset.routine "my_routine", skip_lookup: true
        #
        # @!group Routine
        #
        def routine routine_id, skip_lookup: nil
          ensure_service!
          return Routine.new_reference project_id, dataset_id, routine_id, service if skip_lookup
          gapi = service.get_routine dataset_id, routine_id
          Routine.from_gapi gapi, service
        rescue Google::Cloud::NotFoundError
          nil
        end

        ##
        # Retrieves the list of routines belonging to the dataset.
        #
        # @param [String] token A previously-returned page token representing
        #   part of the larger set of results to view.
        # @param [Integer] max Maximum number of routines to return.
        # @param [String] filter If set, then only the routines matching this filter are returned. The current supported
        #   form is `routineType:`, with a {Routine#routine_type} enum value. Example: `routineType:SCALAR_FUNCTION`.
        #
        # @return [Array<Google::Cloud::Bigquery::Routine>] An array of routines
        #   (See {Google::Cloud::Bigquery::Routine::List})
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   routines = dataset.routines
        #   routines.each do |routine|
        #     puts routine.routine_id
        #   end
        #
        # @example Retrieve all routines: (See {Routine::List#all})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   routines = dataset.routines
        #   routines.all do |routine|
        #     puts routine.routine_id
        #   end
        #
        # @!group Routine
        #
        def routines token: nil, max: nil, filter: nil
          ensure_service!
          gapi = service.list_routines dataset_id, token: token, max: max, filter: filter
          Routine::List.from_gapi gapi, service, dataset_id, max, filter: filter
        end

        ##
        # Queries data by creating a [query
        # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
        #
        # Sets the current dataset as the default dataset in the query. Useful
        # for using unqualified table names.
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {QueryJob::Updater#location=} in a block passed to this method. If the
        # dataset is a full resource representation (see {#resource_full?}), the
        # location of the job will be automatically set to the location of the
        # dataset.
        #
        # @param [String] query A query string, following the BigQuery [query
        #   syntax](https://cloud.google.com/bigquery/query-reference), of the
        #   query to execute. Example: "SELECT count(f1) FROM
        #   [myProjectId:myDatasetId.myTableId]".
        # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
        #   either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
        #   query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
        #   use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
        #   true.
        #
        #   BigQuery types are converted from Ruby types as follows:
        #
        #   | BigQuery     | Ruby                                 | Notes                                              |
        #   |--------------|--------------------------------------|----------------------------------------------------|
        #   | `BOOL`       | `true`/`false`                       |                                                    |
        #   | `INT64`      | `Integer`                            |                                                    |
        #   | `FLOAT64`    | `Float`                              |                                                    |
        #   | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
        #   | `BIGNUMERIC` | `BigDecimal`                         | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `STRING`     | `String`                             |                                                    |
        #   | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
        #   | `DATE`       | `Date`                               |                                                    |
        #   | `GEOGRAPHY`  | `String` (WKT or GeoJSON)            | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `JSON`       | `String` (Stringified JSON)          | String, as JSON does not have a schema to verify.  |
        #   | `TIMESTAMP`  | `Time`                               |                                                    |
        #   | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
        #   | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
        #   | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
        #   | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
        #
        #   See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
        #   of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
        #   GIS data](https://cloud.google.com/bigquery/docs/gis-data).
        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
        #   possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
        #   specify the SQL type for these values.
        #
        #   Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
        #   positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
        #   should be BigQuery type codes from the following list:
        #
        #   * `:BOOL`
        #   * `:INT64`
        #   * `:FLOAT64`
        #   * `:NUMERIC`
        #   * `:BIGNUMERIC`
        #   * `:STRING`
        #   * `:DATETIME`
        #   * `:DATE`
        #   * `:GEOGRAPHY`
        #   * `:JSON`
        #   * `:TIMESTAMP`
        #   * `:TIME`
        #   * `:BYTES`
        #   * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
        #     are specified as `[:INT64]`.
        #   * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
        #     match the `params` hash, and the values are the types value that matches the data.
        #
        #   Types are optional.
        # @param [Hash<String|Symbol, External::DataSource>] external A Hash
        #   that represents the mapping of the external tables to the table
        #   names used in the SQL query. The hash keys are the table names, and
        #   the hash values are the external table objects. See {Dataset#query}.
        # @param [String] priority Specifies a priority for the query. Possible
        #   values include `INTERACTIVE` and `BATCH`. The default value is
        #   `INTERACTIVE`.
        # @param [Boolean] cache Whether to look for the result in the query
        #   cache. The query cache is a best-effort cache that will be flushed
        #   whenever tables in the query are modified. The default value is
        #   true. For more information, see [query
        #   caching](https://developers.google.com/bigquery/querying-data).
        # @param [Table] table The destination table where the query results
        #   should be stored. If not present, a new table will be created to
        #   store the results.
        # @param [String] create Specifies whether the job is allowed to create
        #   new tables. The default value is `needed`.
        #
        #   The following values are supported:
        #
        #   * `needed` - Create the table if it does not exist.
        #   * `never` - The table must already exist. A 'notFound' error is
        #     raised if the table does not exist.
        # @param [String] write Specifies the action that occurs if the
        #   destination table already exists. The default value is `empty`.
        #
        #   The following values are supported:
        #
        #   * `truncate` - BigQuery overwrites the table data.
        #   * `append` - BigQuery appends the data to the table.
        #   * `empty` - A 'duplicate' error is returned in the job result if the
        #     table exists and contains data.
        # @param [Boolean] dryrun If set to true, BigQuery doesn't run the job.
        #   Instead, if the query is valid, BigQuery returns statistics about
        #   the job such as how many bytes would be processed. If the query is
        #   invalid, an error returns. The default value is false.
        # @param [Boolean] standard_sql Specifies whether to use BigQuery's
        #   [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   dialect for this query. If set to true, the query will use standard
        #   SQL rather than the [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect. Optional. The default value is true.
        # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
        #   [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect for this query. If set to false, the query will use
        #   BigQuery's [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   dialect. Optional. The default value is false.
        # @param [Boolean] large_results This option is specific to Legacy SQL.
        #   If `true`, allows the query to produce arbitrarily large result
        #   tables at a slight cost in performance. Requires `table` parameter
        #   to be set.
        # @param [Boolean] flatten This option is specific to Legacy SQL.
        #   Flattens all nested and repeated fields in the query results. The
        #   default value is `true`. `large_results` parameter must be `true` if
        #   this is set to `false`.
        # @param [Integer] maximum_billing_tier Limits the billing tier for this
        #   job. Queries that have resource usage beyond this tier will fail
        #   (without incurring a charge). WARNING: The billed byte amount can be
        #   multiplied by an amount up to this number! Most users should not need
        #   to alter this setting, and we recommend that you avoid introducing new
        #   uses of it. Deprecated.
        # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
        #   job. Queries that will have bytes billed beyond this limit will fail
        #   (without incurring a charge). Optional. If unspecified, this will be
        #   set to your project default.
        # @param [String] job_id A user-defined ID for the query job. The ID
        #   must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
        #   (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
        #   `job_id` is provided, then `prefix` will not be used.
        #
        #   See [Generating a job
        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
        # @param [String] prefix A string, usually human-readable, that will be
        #   prepended to a generated value to produce a unique job ID. For
        #   example, the prefix `daily_import_job_` can be given to generate a
        #   job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
        #   prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
        #   underscores (`_`), or dashes (`-`). The maximum length of the entire ID
        #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
        #   be used.
        # @param [Hash] labels A hash of user-provided labels associated with
        #   the job. You can use these to organize and group your jobs.
        #
        #   The labels applied to a resource must meet the following requirements:
        #
        #   * Each resource can have multiple labels, up to a maximum of 64.
        #   * Each label must be a key-value pair.
        #   * Keys have a minimum length of 1 character and a maximum length of
        #     63 characters, and cannot be empty. Values can be empty, and have
        #     a maximum length of 63 characters.
        #   * Keys and values can contain only lowercase letters, numeric characters,
        #     underscores, and dashes. All characters must use UTF-8 encoding, and
        #     international characters are allowed.
        #   * The key portion of a label must be unique. However, you can use the
        #     same key with multiple resources.
        #   * Keys must start with a lowercase letter or international character.
        # @param [Array<String>, String] udfs User-defined function resources
        #   used in a legacy SQL query. May be either a code resource to load from
        #   a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
        #   that contains code for a user-defined function (UDF). Providing an
        #   inline code resource is equivalent to providing a URI for a file
        #   containing the same code.
        #
        #   This parameter is used for defining User Defined Function (UDF)
        #   resources only when using legacy SQL. Users of standard SQL should
        #   leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
        #   Routines API to define UDF resources.
        #
        #   For additional information on migrating, see: [Migrating to
        #   standard SQL - Differences in user-defined JavaScript
        #   functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
        # @param [Boolean] create_session If true, creates a new session, where the
        #   session ID will be a server generated random id. If false, runs query
        #   with an existing session ID when one is provided in the `session_id`
        #   param, otherwise runs query in non-session mode. See {Job#session_id}.
        #   The default value is false.
        # @param [String] session_id The ID of an existing session. See also the
        #   `create_session` param and {Job#session_id}.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
        #   configuration object for setting additional options for the query.
        #
        # @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
        #
        # @example Query using standard SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "SELECT name FROM my_table"
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using legacy SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "SELECT name FROM my_table",
        #                           legacy_sql: true
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using positional query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "SELECT name FROM my_table WHERE id = ?",
        #                           params: [1]
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using named query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "SELECT name FROM my_table WHERE id = @id",
        #                           params: { id: 1 }
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using named query parameters with types:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
        #                           params: { ids: [] },
        #                           types: { ids: [:INT64] }
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Execute a DDL statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "CREATE TABLE my_table (x INT64)"
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
        #   end
        #
        # @example Execute a DML statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     puts job.num_dml_affected_rows
        #   end
        #
        # @example Run query in a session:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
        #
        #   job.wait_until_done!
        #
        #   session_id = job.session_id
        #   data = dataset.query "SELECT * FROM temptable", session_id: session_id
        #
        # @example Query using external data source, set destination:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   csv_url = "gs://bucket/path/to/data.csv"
        #   csv_table = dataset.external csv_url do |csv|
        #     csv.autodetect = true
        #     csv.skip_leading_rows = 1
        #   end
        #
        #   job = dataset.query_job "SELECT * FROM my_ext_table" do |query|
        #     query.external = { my_ext_table: csv_table }
        #     query.table = dataset.table "my_table", skip_lookup: true
        #   end
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @!group Data
        #
        def query_job query,
                      params: nil,
                      types: nil,
                      external: nil,
                      priority: "INTERACTIVE",
                      cache: true,
                      table: nil,
                      create: nil,
                      write: nil,
                      dryrun: nil,
                      standard_sql: nil,
                      legacy_sql: nil,
                      large_results: nil,
                      flatten: nil,
                      maximum_billing_tier: nil,
                      maximum_bytes_billed: nil,
                      job_id: nil,
                      prefix: nil,
                      labels: nil,
                      udfs: nil,
                      create_session: nil,
                      session_id: nil
          ensure_service!
          options = {
            params: params,
            types: types,
            external: external,
            priority: priority,
            cache: cache,
            table: table,
            create: create,
            write: write,
            dryrun: dryrun,
            standard_sql: standard_sql,
            legacy_sql: legacy_sql,
            large_results: large_results,
            flatten: flatten,
            maximum_billing_tier: maximum_billing_tier,
            maximum_bytes_billed: maximum_bytes_billed,
            job_id: job_id,
            prefix: prefix,
            labels: labels,
            udfs: udfs,
            create_session: create_session,
            session_id: session_id
          }

          updater = QueryJob::Updater.from_options service, query, options
          updater.dataset = self
          updater.location = location if location # may be dataset reference

          yield updater if block_given?

          gapi = service.query_job updater.to_gapi
          Job.from_gapi gapi, service
        end

        ##
        # Queries data and waits for the results. In this method, a {QueryJob}
        # is created and its results are saved to a temporary table, then read
        # from the table. Timeouts and transient errors are generally handled
        # as needed to complete the query. When used for executing DDL/DML
        # statements, this method does not return row data.
        #
        # Sets the current dataset as the default dataset in the query. Useful
        # for using unqualified table names.
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {QueryJob::Updater#location=} in a block passed to this method. If the
        # dataset is a full resource representation (see {#resource_full?}), the
        # location of the job will be automatically set to the location of the
        # dataset.
        #
        # @see https://cloud.google.com/bigquery/querying-data Querying Data
        #
        # @param [String] query A query string, following the BigQuery [query
        #   syntax](https://cloud.google.com/bigquery/query-reference), of the
        #   query to execute. Example: "SELECT count(f1) FROM
        #   [myProjectId:myDatasetId.myTableId]".
        # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
        #   either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
        #   query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
        #   use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
        #   true.
        #
        #   BigQuery types are converted from Ruby types as follows:
        #
        #   | BigQuery     | Ruby                                 | Notes                                              |
        #   |--------------|--------------------------------------|----------------------------------------------------|
        #   | `BOOL`       | `true`/`false`                       |                                                    |
        #   | `INT64`      | `Integer`                            |                                                    |
        #   | `FLOAT64`    | `Float`                              |                                                    |
        #   | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
        #   | `BIGNUMERIC` | `BigDecimal`                         | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `STRING`     | `String`                             |                                                    |
        #   | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
        #   | `DATE`       | `Date`                               |                                                    |
        #   | `GEOGRAPHY`  | `String` (WKT or GeoJSON)            | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `JSON`       | `String` (Stringified JSON)          | String, as JSON does not have a schema to verify.  |
        #   | `TIMESTAMP`  | `Time`                               |                                                    |
        #   | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
        #   | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
        #   | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
        #   | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
        #
        #   See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
        #   of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
        #   GIS data](https://cloud.google.com/bigquery/docs/gis-data).
        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
        #   possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
        #   specify the SQL type for these values.
        #
        #   Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
        #   positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
        #   should be BigQuery type codes from the following list:
        #
        #   * `:BOOL`
        #   * `:INT64`
        #   * `:FLOAT64`
        #   * `:NUMERIC`
        #   * `:BIGNUMERIC`
        #   * `:STRING`
        #   * `:DATETIME`
        #   * `:DATE`
        #   * `:GEOGRAPHY`
        #   * `:JSON`
        #   * `:TIMESTAMP`
        #   * `:TIME`
        #   * `:BYTES`
        #   * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
        #     are specified as `[:INT64]`.
        #   * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
        #     match the `params` hash, and the values are the types value that matches the data.
        #
        #   Types are optional.
        # @param [Hash<String|Symbol, External::DataSource>] external A Hash
        #   that represents the mapping of the external tables to the table
        #   names used in the SQL query. The hash keys are the table names, and
        #   the hash values are the external table objects. See {Dataset#query}.
        # @param [Integer] max The maximum number of rows of data to return per
        #   page of results. Setting this flag to a small value such as 1000 and
        #   then paging through results might improve reliability when the query
        #   result set is large. In addition to this limit, responses are also
        #   limited to 10 MB. By default, there is no maximum row count, and
        #   only the byte limit applies.
        # @param [Boolean] cache Whether to look for the result in the query
        #   cache. The query cache is a best-effort cache that will be flushed
        #   whenever tables in the query are modified. The default value is
        #   true. For more information, see [query
        #   caching](https://developers.google.com/bigquery/querying-data).
        # @param [Boolean] standard_sql Specifies whether to use BigQuery's
        #   [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   dialect for this query. If set to true, the query will use standard
        #   SQL rather than the [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect. When set to true, the values of `large_results` and
        #   `flatten` are ignored; the query will be run as if `large_results`
        #   is true and `flatten` is false. Optional. The default value is
        #   true.
        # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
        #   [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect for this query. If set to false, the query will use
        #   BigQuery's [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   When set to false, the values of `large_results` and `flatten` are
        #   ignored; the query will be run as if `large_results` is true and
        #   `flatten` is false. Optional. The default value is false.
        # @param [String] session_id The ID of an existing session. See the
        #   `create_session` param in {#query_job} and {Job#session_id}.
        # @param [Boolean] format_options_use_int64_timestamp Output timestamp
        #   as usec int64. Default is true.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
        #   configuration object for setting additional options for the query.
        #
        # @return [Google::Cloud::Bigquery::Data] A new data object.
        #
        # @example Query using standard SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   data = dataset.query "SELECT name FROM my_table"
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Query using legacy SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   data = dataset.query "SELECT name FROM my_table",
        #                        legacy_sql: true
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Query using positional query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   data = dataset.query "SELECT name FROM my_table WHERE id = ?",
        #                        params: [1]
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Query using named query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   data = dataset.query "SELECT name FROM my_table WHERE id = @id",
        #                        params: { id: 1 }
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Query using named query parameters with types:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   data = dataset.query "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
        #                        params: { ids: [] },
        #                        types: { ids: [:INT64] }
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Execute a DDL statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   data = dataset.query "CREATE TABLE my_table (x INT64)"
        #
        #   table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
        #
        # @example Execute a DML statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   data = dataset.query "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
        #
        #   puts data.num_dml_affected_rows
        #
        # @example Run query in a session:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
        #
        #   job.wait_until_done!
        #
        #   session_id = job.session_id
        #   data = dataset.query "SELECT * FROM temptable", session_id: session_id
        #
        # @example Query using external data source, set destination:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   csv_url = "gs://bucket/path/to/data.csv"
        #   csv_table = dataset.external csv_url do |csv|
        #     csv.autodetect = true
        #     csv.skip_leading_rows = 1
        #   end
        #
        #   data = dataset.query "SELECT * FROM my_ext_table" do |query|
        #     query.external = { my_ext_table: csv_table }
        #     query.table = dataset.table "my_table", skip_lookup: true
        #   end
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @!group Data
        #
        def query query,
                  params: nil,
                  types: nil,
                  external: nil,
                  max: nil,
                  cache: true,
                  standard_sql: nil,
                  legacy_sql: nil,
                  session_id: nil,
                  format_options_use_int64_timestamp: true,
                  &block
          job = query_job query,
                          params: params,
                          types: types,
                          external: external,
                          cache: cache,
                          standard_sql: standard_sql,
                          legacy_sql: legacy_sql,
                          session_id: session_id,
                          &block
          job.wait_until_done!
          ensure_job_succeeded! job

          job.data max: max, format_options_use_int64_timestamp: format_options_use_int64_timestamp
        end

        ##
        # Creates a new External::DataSource (or subclass) object that
        # represents the external data source that can be queried from directly,
        # even though the data is not stored in BigQuery. Instead of loading or
        # streaming the data, this object references the external data source.
        #
        # @see https://cloud.google.com/bigquery/external-data-sources Querying
        #   External Data Sources
        #
        # @param [String, Array<String>] url The fully-qualified URL(s) that
        #   point to your data in Google Cloud. An attempt will be made to
        #   derive the format from the URLs provided.
        # @param [String|Symbol] format The data format. This value will be used
        #   even if the provided URLs are recognized as a different format.
        #   Optional.
        #
        #   The following values are supported:
        #
        #   * `csv` - CSV
        #   * `json` - [Newline-delimited JSON](https://jsonlines.org/)
        #   * `avro` - [Avro](http://avro.apache.org/)
        #   * `sheets` - Google Sheets
        #   * `datastore_backup` - Cloud Datastore backup
        #   * `bigtable` - Bigtable
        #
        # @return [External::DataSource] External data source.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   csv_url = "gs://bucket/path/to/data.csv"
        #   csv_table = dataset.external csv_url do |csv|
        #     csv.autodetect = true
        #     csv.skip_leading_rows = 1
        #   end
        #
        #   data = dataset.query "SELECT * FROM my_ext_table",
        #                         external: { my_ext_table: csv_table }
        #
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #
        def external url, format: nil
          ext = External.from_urls url, format
          yield ext if block_given?
          ext
        end

        ##
        # Loads data into the provided destination table using an asynchronous
        # method. In this method, a {LoadJob} is immediately returned. The
        # caller may poll the service by repeatedly calling {Job#reload!} and
        # {Job#done?} to detect when the job is done, or simply block until the
        # job is done by calling #{Job#wait_until_done!}. See also {#load}.
        #
        # For the source of the data, you can pass a google-cloud storage file
        # path or a google-cloud-storage `File` instance. Or, you can upload a
        # file directly. See [Loading Data with a POST
        # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {LoadJob::Updater#location=} in a block passed to this method. If the
        # dataset is a full resource representation (see {#resource_full?}), the
        # location of the job will be automatically set to the location of the
        # dataset.
        #
        # @param [String] table_id The destination table to load the data into.
        # @param [File, Google::Cloud::Storage::File, String, URI,
        #   Array<Google::Cloud::Storage::File, String, URI>] files
        #   A file or the URI of a Google Cloud Storage file, or an Array of
        #   those, containing data to load into the table.
        # @param [String] format The exported file format. The default value is
        #   `csv`.
        #
        #   The following values are supported:
        #
        #   * `csv` - CSV
        #   * `json` - [Newline-delimited JSON](https://jsonlines.org/)
        #   * `avro` - [Avro](http://avro.apache.org/)
        #   * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
        #   * `parquet` - [Parquet](https://parquet.apache.org/)
        #   * `datastore_backup` - Cloud Datastore backup
        # @param [String] create Specifies whether the job is allowed to create
        #   new tables. The default value is `needed`.
        #
        #   The following values are supported:
        #
        #   * `needed` - Create the table if it does not exist.
        #   * `never` - The table must already exist. A 'notFound' error is
        #     raised if the table does not exist.
        # @param [String] write Specifies how to handle data already present in
        #   the table. The default value is `append`.
        #
        #   The following values are supported:
        #
        #   * `truncate` - BigQuery overwrites the table data.
        #   * `append` - BigQuery appends the data to the table.
        #   * `empty` - An error will be returned if the table already contains
        #     data.
        # @param [Array<String>] projection_fields If the `format` option is set
        #   to `datastore_backup`, indicates which entity properties to load
        #   from a Cloud Datastore backup. Property names are case sensitive and
        #   must be top-level properties. If not set, BigQuery loads all
        #   properties. If any named property isn't found in the Cloud Datastore
        #   backup, an invalid error is returned.
        # @param [Boolean] jagged_rows Accept rows that are missing trailing
        #   optional columns. The missing values are treated as nulls. If
        #   `false`, records with missing trailing columns are treated as bad
        #   records, and if there are too many bad records, an invalid error is
        #   returned in the job result. The default value is `false`. Only
        #   applicable to CSV, ignored for other formats.
        # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
        #   quoted data sections that contain newline characters in a CSV file.
        #   The default value is `false`.
        # @param [Boolean] autodetect Indicates if BigQuery should
        #   automatically infer the options and schema for CSV and JSON sources.
        #   The default value is `false`.
        # @param [String] encoding The character encoding of the data. The
        #   supported values are `UTF-8` or `ISO-8859-1`. The default value is
        #   `UTF-8`.
        # @param [String] delimiter Specifices the separator for fields in a CSV
        #   file. BigQuery converts the string to `ISO-8859-1` encoding, and
        #   then uses the first byte of the encoded string to split the data in
        #   its raw, binary state. Default is <code>,</code>.
        # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
        #   extra values that are not represented in the table schema. If true,
        #   the extra values are ignored. If false, records with extra columns
        #   are treated as bad records, and if there are too many bad records,
        #   an invalid error is returned in the job result. The default value is
        #   `false`.
        #
        #   The `format` property determines what BigQuery treats as an extra
        #   value:
        #
        #   * `CSV`: Trailing columns
        #   * `JSON`: Named values that don't match any column names
        # @param [Integer] max_bad_records The maximum number of bad records
        #   that BigQuery can ignore when running the job. If the number of bad
        #   records exceeds this value, an invalid error is returned in the job
        #   result. The default value is `0`, which requires that all records
        #   are valid.
        # @param [String] null_marker Specifies a string that represents a null
        #   value in a CSV file. For example, if you specify `\N`, BigQuery
        #   interprets `\N` as a null value when loading a CSV file. The default
        #   value is the empty string. If you set this property to a custom
        #   value, BigQuery throws an error if an empty string is present for
        #   all data types except for STRING and BYTE. For STRING and BYTE
        #   columns, BigQuery interprets the empty string as an empty value.
        # @param [String] quote The value that is used to quote data sections in
        #   a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
        #   then uses the first byte of the encoded string to split the data in
        #   its raw, binary state. The default value is a double-quote
        #   <code>"</code>. If your data does not contain quoted sections, set
        #   the property value to an empty string. If your data contains quoted
        #   newline characters, you must also set the allowQuotedNewlines
        #   property to true.
        # @param [Integer] skip_leading The number of rows at the top of a CSV
        #   file that BigQuery will skip when loading the data. The default
        #   value is `0`. This property is useful if you have header rows in the
        #   file that should be skipped.
        # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
        #   destination table. Optional. The schema can be omitted if the
        #   destination table already exists, or if you're loading data from a
        #   Google Cloud Datastore backup.
        #
        #   See {Project#schema} for the creation of the schema for use with
        #   this option. Also note that for most use cases, the block yielded by
        #   this method is a more convenient way to configure the schema.
        # @param [String] job_id A user-defined ID for the load job. The ID
        #   must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
        #   (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
        #   `job_id` is provided, then `prefix` will not be used.
        #
        #   See [Generating a job
        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
        # @param [String] prefix A string, usually human-readable, that will be
        #   prepended to a generated value to produce a unique job ID. For
        #   example, the prefix `daily_import_job_` can be given to generate a
        #   job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
        #   prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
        #   underscores (`_`), or dashes (`-`). The maximum length of the entire ID
        #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
        #   be used.
        # @param [Hash] labels A hash of user-provided labels associated with
        #   the job. You can use these to organize and group your jobs.
        #
        #   The labels applied to a resource must meet the following requirements:
        #
        #   * Each resource can have multiple labels, up to a maximum of 64.
        #   * Each label must be a key-value pair.
        #   * Keys have a minimum length of 1 character and a maximum length of
        #     63 characters, and cannot be empty. Values can be empty, and have
        #     a maximum length of 63 characters.
        #   * Keys and values can contain only lowercase letters, numeric characters,
        #     underscores, and dashes. All characters must use UTF-8 encoding, and
        #     international characters are allowed.
        #   * The key portion of a label must be unique. However, you can use the
        #     same key with multiple resources.
        #   * Keys must start with a lowercase letter or international character.
        # @param [Boolean] dryrun  If set, don't actually run this job. Behavior
        #   is undefined however for non-query jobs and may result in an error.
        #   Deprecated.
        # @param [Boolean] create_session If set to true a new session will be created
        #   and the load job will happen in the table created within that session.
        #   Note: This will work only for _SESSION dataset.
        # @param [string] session_id Session ID in which the load job must run.
        #
        # @yield [updater] A block for setting the schema and other
        #   options for the destination table. The schema can be omitted if the
        #   destination table already exists, or if you're loading data from a
        #   Google Cloud Datastore backup.
        # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
        #   updater to modify the load job and its schema.
        #
        # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   gs_url = "gs://my-bucket/file-name.csv"
        #   load_job = dataset.load_job "my_new_table", gs_url do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Pass a google-cloud-storage `File` instance:
        #   require "google/cloud/bigquery"
        #   require "google/cloud/storage"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   storage = Google::Cloud::Storage.new
        #   bucket = storage.bucket "my-bucket"
        #   file = bucket.file "file-name.csv"
        #   load_job = dataset.load_job "my_new_table", file do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Pass a list of google-cloud-storage files:
        #   require "google/cloud/bigquery"
        #   require "google/cloud/storage"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   storage = Google::Cloud::Storage.new
        #   bucket = storage.bucket "my-bucket"
        #   file = bucket.file "file-name.csv"
        #   list = [file, "gs://my-bucket/file-name2.csv"]
        #   load_job = dataset.load_job "my_new_table", list do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Upload a file directly:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   file = File.open "my_data.csv"
        #   load_job = dataset.load_job "my_new_table", file do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Schema is not required with a Cloud Datastore backup:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   load_job = dataset.load_job(
        #                "my_new_table",
        #                "gs://my-bucket/xxxx.kind_name.backup_info") do |j|
        #     j.format = "datastore_backup"
        #   end
        #
        # @!group Data
        #
        def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
                     quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
                     quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
                     null_marker: nil, dryrun: nil, create_session: nil, session_id: nil
          ensure_service!

          updater = load_job_updater table_id,
                                     format: format, create: create, write: write, projection_fields: projection_fields,
                                     jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
                                     delimiter: delimiter, ignore_unknown: ignore_unknown,
                                     max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
                                     dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
                                     autodetect: autodetect, null_marker: null_marker, create_session: create_session,
                                     session_id: session_id

          yield updater if block_given?

          load_local_or_uri files, updater
        end

        ##
        # Loads data into the provided destination table using a synchronous
        # method that blocks for a response. Timeouts and transient errors are
        # generally handled as needed to complete the job. See also
        # {#load_job}.
        #
        # For the source of the data, you can pass a google-cloud storage file
        # path or a google-cloud-storage `File` instance. Or, you can upload a
        # file directly. See [Loading Data with a POST
        # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {LoadJob::Updater#location=} in a block passed to this method. If the
        # dataset is a full resource representation (see {#resource_full?}), the
        # location of the job will be automatically set to the location of the
        # dataset.
        #
        # @param [String] table_id The destination table to load the data into.
        # @param [File, Google::Cloud::Storage::File, String, URI,
        #   Array<Google::Cloud::Storage::File, String, URI>] files
        #   A file or the URI of a Google Cloud Storage file, or an Array of
        #   those, containing data to load into the table.
        # @param [String] format The exported file format. The default value is
        #   `csv`.
        #
        #   The following values are supported:
        #
        #   * `csv` - CSV
        #   * `json` - [Newline-delimited JSON](https://jsonlines.org/)
        #   * `avro` - [Avro](http://avro.apache.org/)
        #   * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
        #   * `parquet` - [Parquet](https://parquet.apache.org/)
        #   * `datastore_backup` - Cloud Datastore backup
        # @param [String] create Specifies whether the job is allowed to create
        #   new tables. The default value is `needed`.
        #
        #   The following values are supported:
        #
        #   * `needed` - Create the table if it does not exist.
        #   * `never` - The table must already exist. A 'notFound' error is
        #     raised if the table does not exist.
        # @param [String] write Specifies how to handle data already present in
        #   the table. The default value is `append`.
        #
        #   The following values are supported:
        #
        #   * `truncate` - BigQuery overwrites the table data.
        #   * `append` - BigQuery appends the data to the table.
        #   * `empty` - An error will be returned if the table already contains
        #     data.
        # @param [Array<String>] projection_fields If the `format` option is set
        #   to `datastore_backup`, indicates which entity properties to load
        #   from a Cloud Datastore backup. Property names are case sensitive and
        #   must be top-level properties. If not set, BigQuery loads all
        #   properties. If any named property isn't found in the Cloud Datastore
        #   backup, an invalid error is returned.
        # @param [Boolean] jagged_rows Accept rows that are missing trailing
        #   optional columns. The missing values are treated as nulls. If
        #   `false`, records with missing trailing columns are treated as bad
        #   records, and if there are too many bad records, an invalid error is
        #   returned in the job result. The default value is `false`. Only
        #   applicable to CSV, ignored for other formats.
        # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
        #   quoted data sections that contain newline characters in a CSV file.
        #   The default value is `false`.
        # @param [Boolean] autodetect Indicates if BigQuery should
        #   automatically infer the options and schema for CSV and JSON sources.
        #   The default value is `false`.
        # @param [String] encoding The character encoding of the data. The
        #   supported values are `UTF-8` or `ISO-8859-1`. The default value is
        #   `UTF-8`.
        # @param [String] delimiter Specifices the separator for fields in a CSV
        #   file. BigQuery converts the string to `ISO-8859-1` encoding, and
        #   then uses the first byte of the encoded string to split the data in
        #   its raw, binary state. Default is <code>,</code>.
        # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
        #   extra values that are not represented in the table schema. If true,
        #   the extra values are ignored. If false, records with extra columns
        #   are treated as bad records, and if there are too many bad records,
        #   an invalid error is returned in the job result. The default value is
        #   `false`.
        #
        #   The `format` property determines what BigQuery treats as an extra
        #   value:
        #
        #   * `CSV`: Trailing columns
        #   * `JSON`: Named values that don't match any column names
        # @param [Integer] max_bad_records The maximum number of bad records
        #   that BigQuery can ignore when running the job. If the number of bad
        #   records exceeds this value, an invalid error is returned in the job
        #   result. The default value is `0`, which requires that all records
        #   are valid.
        # @param [String] null_marker Specifies a string that represents a null
        #   value in a CSV file. For example, if you specify `\N`, BigQuery
        #   interprets `\N` as a null value when loading a CSV file. The default
        #   value is the empty string. If you set this property to a custom
        #   value, BigQuery throws an error if an empty string is present for
        #   all data types except for STRING and BYTE. For STRING and BYTE
        #   columns, BigQuery interprets the empty string as an empty value.
        # @param [String] quote The value that is used to quote data sections in
        #   a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
        #   then uses the first byte of the encoded string to split the data in
        #   its raw, binary state. The default value is a double-quote
        #   <code>"</code>. If your data does not contain quoted sections, set
        #   the property value to an empty string. If your data contains quoted
        #   newline characters, you must also set the allowQuotedNewlines
        #   property to true.
        # @param [Integer] skip_leading The number of rows at the top of a CSV
        #   file that BigQuery will skip when loading the data. The default
        #   value is `0`. This property is useful if you have header rows in the
        #   file that should be skipped.
        # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
        #   destination table. Optional. The schema can be omitted if the
        #   destination table already exists, or if you're loading data from a
        #   Google Cloud Datastore backup.
        #
        #   See {Project#schema} for the creation of the schema for use with
        #   this option. Also note that for most use cases, the block yielded by
        #   this method is a more convenient way to configure the schema.
        # @param [string] session_id Session ID in which the load job must run.
        #
        #
        # @yield [updater] A block for setting the schema of the destination
        #   table and other options for the load job. The schema can be omitted
        #   if the destination table already exists, or if you're loading data
        #   from a Google Cloud Datastore backup.
        # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
        #   updater to modify the load job and its schema.
        #
        # @return [Boolean] Returns `true` if the load job was successful.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   gs_url = "gs://my-bucket/file-name.csv"
        #   dataset.load "my_new_table", gs_url do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Pass a google-cloud-storage `File` instance:
        #   require "google/cloud/bigquery"
        #   require "google/cloud/storage"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   storage = Google::Cloud::Storage.new
        #   bucket = storage.bucket "my-bucket"
        #   file = bucket.file "file-name.csv"
        #   dataset.load "my_new_table", file do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Pass a list of google-cloud-storage files:
        #   require "google/cloud/bigquery"
        #   require "google/cloud/storage"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   storage = Google::Cloud::Storage.new
        #   bucket = storage.bucket "my-bucket"
        #   file = bucket.file "file-name.csv"
        #   list = [file, "gs://my-bucket/file-name2.csv"]
        #   dataset.load "my_new_table", list do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Upload a file directly:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   file = File.open "my_data.csv"
        #   dataset.load "my_new_table", file do |schema|
        #     schema.string "first_name", mode: :required
        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        # @example Schema is not required with a Cloud Datastore backup:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   dataset.load "my_new_table",
        #                "gs://my-bucket/xxxx.kind_name.backup_info" do |j|
        #     j.format = "datastore_backup"
        #   end
        #
        # @!group Data
        #
        def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
                 quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
                 quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
          job = load_job table_id, files,
                         format: format, create: create, write: write, projection_fields: projection_fields,
                         jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
                         delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
                         quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
                         null_marker: null_marker, session_id: session_id, &block

          job.wait_until_done!
          ensure_job_succeeded! job
          true
        end

        ##
        # Reloads the dataset with current data from the BigQuery service.
        #
        # @return [Google::Cloud::Bigquery::Dataset] Returns the reloaded
        #   dataset.
        #
        # @example Skip retrieving the dataset from the service, then load it:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #   dataset.reload!
        #
        def reload!
          ensure_service!
          @gapi = service.get_project_dataset project_id, dataset_id, access_policy_version: @access_policy_version
          @reference = nil
          @exists = nil
          self
        end
        alias refresh! reload!

        ##
        # Determines whether the dataset exists in the BigQuery service. The
        # result is cached locally. To refresh state, set `force` to `true`.
        #
        # @param [Boolean] force Force the latest resource representation to be
        #   retrieved from the BigQuery service when `true`. Otherwise the
        #   return value of this method will be memoized to reduce the number of
        #   API calls made to the BigQuery service. The default is `false`.
        #
        # @return [Boolean] `true` when the dataset exists in the BigQuery
        #   service, `false` otherwise.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #   dataset.exists? # true
        #
        def exists? force: false
          return gapi_exists? if force
          # If we have a memoized value, return it
          return @exists unless @exists.nil?
          # Always true if we have a gapi object
          return true if resource?
          gapi_exists?
        end

        ##
        # Whether the dataset was created without retrieving the resource
        # representation from the BigQuery service.
        #
        # @return [Boolean] `true` when the dataset is just a local reference
        #   object, `false` otherwise.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #
        #   dataset.reference? # true
        #   dataset.reload!
        #   dataset.reference? # false
        #
        def reference?
          @gapi.nil?
        end

        ##
        # Whether the dataset was created with a resource representation from
        # the BigQuery service.
        #
        # @return [Boolean] `true` when the dataset was created with a resource
        #   representation, `false` otherwise.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #
        #   dataset.resource? # false
        #   dataset.reload!
        #   dataset.resource? # true
        #
        def resource?
          !@gapi.nil?
        end

        ##
        # Whether the dataset was created with a partial resource representation
        # from the BigQuery service by retrieval through {Project#datasets}.
        # See [Datasets: list
        # response](https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#response)
        # for the contents of the partial representation. Accessing any
        # attribute outside of the partial representation will result in loading
        # the full representation.
        #
        # @return [Boolean] `true` when the dataset was created with a partial
        #   resource representation, `false` otherwise.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.datasets.first
        #
        #   dataset.resource_partial? # true
        #   dataset.description # Loads the full resource.
        #   dataset.resource_partial? # false
        #
        def resource_partial?
          @gapi.is_a? Google::Apis::BigqueryV2::DatasetList::Dataset
        end

        ##
        # Whether the dataset was created with a full resource representation
        # from the BigQuery service.
        #
        # @return [Boolean] `true` when the dataset was created with a full
        #   resource representation, `false` otherwise.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   dataset.resource_full? # true
        #
        def resource_full?
          @gapi.is_a? Google::Apis::BigqueryV2::Dataset
        end

        ##
        # @private New Dataset from a Google API Client object.
        def self.from_gapi gapi, conn, access_policy_version: nil
          new.tap do |f|
            f.gapi = gapi
            f.service = conn
            f.access_policy_version = access_policy_version
          end
        end

        ##
        # @private New lazy Dataset object without making an HTTP request, for use with the skip_lookup option.
        def self.new_reference project_id, dataset_id, service
          raise ArgumentError, "dataset_id is required" unless dataset_id
          new.tap do |b|
            reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new \
              project_id: project_id, dataset_id: dataset_id
            b.service = service
            b.instance_variable_set :@reference, reference_gapi
          end
        end

        ##
        # Inserts data into the given table for near-immediate querying, without
        # the need to complete a load operation before the data can appear in
        # query results.
        #
        # Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
        # complex types:
        #
        # | BigQuery     | Ruby                                 | Notes                                              |
        # |--------------|--------------------------------------|----------------------------------------------------|
        # | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
        # | `BIGNUMERIC` | `String`                             | Pass as `String` to avoid rounding to scale 9.     |
        # | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
        # | `DATE`       | `Date`                               |                                                    |
        # | `GEOGRAPHY`  | `String`                             |                                                    |
        # | `JSON`       | `String` (Stringified JSON)          | String, as JSON does not have a schema to verify.  |
        # | `TIMESTAMP`  | `Time`                               |                                                    |
        # | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
        # | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
        # | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
        # | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
        #
        # Because BigQuery's streaming API is designed for high insertion rates,
        # modifications to the underlying table metadata are eventually
        # consistent when interacting with the streaming system. In most cases
        # metadata changes are propagated within minutes, but during this period
        # API responses may reflect the inconsistent state of the table.
        #
        # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
        #   Streaming Data Into BigQuery
        #
        # @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
        #   BigQuery Troubleshooting: Metadata errors for streaming inserts
        #
        # @param [String] table_id The ID of the destination table.
        # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
        #   containing the data. Required. `BigDecimal` values will be rounded to
        #   scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
        #   rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
        #   instead of `BigDecimal`.
        # @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
        #   detect duplicate insertion requests on a best-effort basis. For more information, see [data
        #   consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
        #   not provided, the client library will assign a UUID to each row before the request is sent.
        #
        #  The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of an
        #  ID for a specific row in the array.
        # @param [Boolean] skip_invalid Insert all valid rows of a request, even
        #   if invalid rows exist. The default value is `false`, which causes
        #   the entire request to fail if any invalid rows exist.
        # @param [Boolean] ignore_unknown Accept rows that contain values that
        #   do not match the schema. The unknown values are ignored. Default is
        #   false, which treats unknown values as errors.
        # @param [Boolean] autocreate Specifies whether the method should create
        #   a new table with the given `table_id`, if no table is found for
        #   `table_id`. The default value is false.
        #
        # @yield [table] a block for setting the table
        # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
        #   to set additional properties on the table in the API request to
        #   create it. Only used when `autocreate` is set and the table does not
        #   already exist.
        #
        # @return [Google::Cloud::Bigquery::InsertResponse] An insert response
        #   object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   rows = [
        #     { "first_name" => "Alice", "age" => 21 },
        #     { "first_name" => "Bob", "age" => 22 }
        #   ]
        #   dataset.insert "my_table", rows
        #
        # @example Avoid retrieving the dataset with `skip_lookup`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #
        #   rows = [
        #     { "first_name" => "Alice", "age" => 21 },
        #     { "first_name" => "Bob", "age" => 22 }
        #   ]
        #   dataset.insert "my_table", rows
        #
        # @example Using `autocreate` to create a new table if none exists.
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   rows = [
        #     { "first_name" => "Alice", "age" => 21 },
        #     { "first_name" => "Bob", "age" => 22 }
        #   ]
        #   dataset.insert "my_table", rows, autocreate: true do |t|
        #     t.schema.string "first_name", mode: :required
        #     t.schema.integer "age", mode: :required
        #   end
        #
        # @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   row = {
        #     "my_numeric" => BigDecimal("123456798.987654321"),
        #     "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
        #   }
        #   dataset.insert "my_table", row
        #
        # @!group Data
        #
        def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
          rows = [rows] if rows.is_a? Hash
          raise ArgumentError, "No rows provided" if rows.empty?

          insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
          insert_ids = Array insert_ids
          if insert_ids.count.positive? && insert_ids.count != rows.count
            raise ArgumentError, "insert_ids must be the same size as rows"
          end

          if autocreate
            insert_data_with_autocreate table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
                                                        insert_ids: insert_ids, &block
          else
            insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
                                        insert_ids: insert_ids
          end
        end

        ##
        # Create an asynchronous inserter object used to insert rows in batches.
        #
        # @param [String] table_id The ID of the table to insert rows into.
        # @param [Boolean] skip_invalid Insert all valid rows of a request, even
        #   if invalid rows exist. The default value is `false`, which causes
        #   the entire request to fail if any invalid rows exist.
        # @param [Boolean] ignore_unknown Accept rows that contain values that
        #   do not match the schema. The unknown values are ignored. Default is
        #   false, which treats unknown values as errors.
        # @attr_reader [Integer] max_bytes The maximum size of rows to be
        #   collected before the batch is published. Default is 10,000,000
        #   (10MB).
        # @param [Integer] max_rows The maximum number of rows to be collected
        #   before the batch is published. Default is 500.
        # @attr_reader [Numeric] interval The number of seconds to collect
        #   messages before the batch is published. Default is 10.
        # @attr_reader [Numeric] threads The number of threads used to insert
        #   batches of rows. Default is 4.
        # @param [String] view Specifies the view that determines which table information is returned.
        #   By default, basic table information and storage statistics (STORAGE_STATS) are returned.
        #   Accepted values include `:unspecified`, `:basic`, `:storage`, and
        #   `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
        #   The default value is the `:unspecified` view type.
        # @yield [response] the callback for when a batch of rows is inserted
        # @yieldparam [Table::AsyncInserter::Result] result the result of the
        #   asynchronous insert
        #
        # @return [Table::AsyncInserter] Returns an inserter object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   inserter = dataset.insert_async "my_table" do |result|
        #     if result.error?
        #       log_error result.error
        #     else
        #       log_insert "inserted #{result.insert_count} rows " \
        #         "with #{result.error_count} errors"
        #     end
        #   end
        #
        #   rows = [
        #     { "first_name" => "Alice", "age" => 21 },
        #     { "first_name" => "Bob", "age" => 22 }
        #   ]
        #   inserter.insert rows
        #
        #   inserter.stop.wait!
        #
        # @example Avoid retrieving transient stats of the table with while inserting :
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   inserter = dataset.insert_async("my_table", view: "basic") do |result|
        #     if result.error?
        #       log_error result.error
        #     else
        #       log_insert "inserted #{result.insert_count} rows " \
        #         "with #{result.error_count} errors"
        #     end
        #   end
        #
        #   rows = [
        #     { "first_name" => "Alice", "age" => 21 },
        #     { "first_name" => "Bob", "age" => 22 }
        #   ]
        #   inserter.insert rows
        #
        #   inserter.stop.wait!
        #
        def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
                         interval: 10, threads: 4, view: nil, &block
          ensure_service!

          # Get table, don't use Dataset#table which handles NotFoundError
          gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: view
          table = Table.from_gapi gapi, service, metadata_view: view
          # Get the AsyncInserter from the table
          table.insert_async skip_invalid: skip_invalid,
                             ignore_unknown: ignore_unknown,
                             max_bytes: max_bytes, max_rows: max_rows,
                             interval: interval, threads: threads, &block
        end

        ##
        # Build an object of type Google::Apis::BigqueryV2::DatasetAccessEntry from
        # the self.
        #
        # @param [Array<String>] target_types The list of target types within the dataset.
        #
        # @return [Google::Apis::BigqueryV2::DatasetAccessEntry] Returns a DatasetAccessEntry object.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   dataset_access_entry = dataset.access_entry target_types: ["VIEWS"]
        #
        def build_access_entry target_types: nil
          params = {
            dataset: dataset_ref,
            target_types: target_types
          }.compact
          Google::Apis::BigqueryV2::DatasetAccessEntry.new(**params)
        end

        protected

        def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
          insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown, insert_ids: insert_ids
        rescue Google::Cloud::NotFoundError
          sleep rand(1..60)
          begin
            create_table table_id do |tbl_updater|
              yield tbl_updater if block_given?
            end
          rescue Google::Cloud::AlreadyExistsError
            # Do nothing if it already exists
          end
          sleep 60
          retry
        end

        def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
          rows = [rows] if rows.is_a? Hash
          raise ArgumentError, "No rows provided" if rows.empty?
          ensure_service!
          gapi = service.insert_tabledata dataset_id, table_id, rows, skip_invalid:   skip_invalid,
                                                                      ignore_unknown: ignore_unknown,
                                                                      insert_ids:     insert_ids,
                                                                      project_id:     project_id
          InsertResponse.from_gapi rows, gapi
        end

        ##
        # Raise an error unless an active service is available.
        def ensure_service!
          raise "Must have active connection" unless service
        end

        ##
        # Ensures the Google::Apis::BigqueryV2::Dataset object has been loaded
        # from the service.
        def ensure_gapi!
          ensure_service!
          return unless reference?
          reload!
        end

        ##
        # Fetch gapi and memoize whether resource exists.
        def gapi_exists?
          reload!
          @exists = true
        rescue Google::Cloud::NotFoundError
          @exists = false
        end

        def patch_gapi! *attributes
          return if attributes.empty?
          ensure_service!
          patch_args = attributes.to_h { |attr| [attr, @gapi.send(attr)] }
          patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
          patch_gapi.etag = etag if etag
          @gapi = service.patch_dataset dataset_id, patch_gapi, access_policy_version: @access_policy_version
        end

        ##
        # Load the complete representation of the dataset if it has been
        # only partially loaded by a request to the API list method.
        def ensure_full_data!
          reload! unless resource_full?
        end

        def ensure_job_succeeded! job
          return unless job.failed?
          begin
            # raise to activate ruby exception cause handling
            raise job.gapi_error
          rescue StandardError => e
            # wrap Google::Apis::Error with Google::Cloud::Error
            raise Google::Cloud::Error.from_error(e)
          end
        end

        def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil
          job_ref = service.job_ref_from job_id, prefix
          Google::Apis::BigqueryV2::Job.new(
            job_reference: job_ref,
            configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
              load:    Google::Apis::BigqueryV2::JobConfigurationLoad.new(
                destination_table: Google::Apis::BigqueryV2::TableReference.new(
                  project_id: project_id,
                  dataset_id: dataset_id,
                  table_id:   table_id
                )
              ),
              dry_run: dryrun
            )
          )
        end

        def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,
                                  skip_leading: nil, null_marker: nil
          job.jagged_rows = jagged_rows unless jagged_rows.nil?
          job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
          job.delimiter = delimiter unless delimiter.nil?
          job.null_marker = null_marker unless null_marker.nil?
          job.quote = quote unless quote.nil?
          job.skip_leading = skip_leading unless skip_leading.nil?
        end

        def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
                                   encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
                                   skip_leading: nil, null_marker: nil
          job.format = format unless format.nil?
          job.projection_fields = projection_fields unless projection_fields.nil?
          job.encoding = encoding unless encoding.nil?
          job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
          job.max_bad_records = max_bad_records unless max_bad_records.nil?
          load_job_csv_options! job, jagged_rows:     jagged_rows,
                                     quoted_newlines: quoted_newlines,
                                     delimiter:       delimiter,
                                     quote:           quote,
                                     skip_leading:    skip_leading,
                                     null_marker:     null_marker
        end

        def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
                             quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
                             max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
                             prefix: nil, labels: nil, autodetect: nil, null_marker: nil, create_session: nil,
                             session_id: nil
          new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
          LoadJob::Updater.new(new_job).tap do |job|
            job.location = location if location # may be dataset reference
            job.create = create unless create.nil?
            job.write = write unless write.nil?
            job.schema = schema unless schema.nil?
            job.autodetect = autodetect unless autodetect.nil?
            job.labels = labels unless labels.nil?
            job.create_session = create_session unless create_session.nil?
            job.session_id = session_id unless session_id.nil?
            load_job_file_options! job, format:            format,
                                        projection_fields: projection_fields,
                                        jagged_rows:       jagged_rows,
                                        quoted_newlines:   quoted_newlines,
                                        encoding:          encoding,
                                        delimiter:         delimiter,
                                        ignore_unknown:    ignore_unknown,
                                        max_bad_records:   max_bad_records,
                                        quote:             quote,
                                        skip_leading:      skip_leading,
                                        null_marker:       null_marker
          end
        end

        def load_storage urls, job_gapi
          # Convert to storage URL
          urls = [urls].flatten.map do |url|
            if url.respond_to? :to_gs_url
              url.to_gs_url
            elsif url.is_a? URI
              url.to_s
            else
              url
            end
          end

          unless urls.nil?
            job_gapi.configuration.load.update! source_uris: urls
            if job_gapi.configuration.load.source_format.nil?
              source_format = Convert.derive_source_format_from_list urls
              job_gapi.configuration.load.source_format = source_format unless source_format.nil?
            end
          end

          gapi = service.load_table_gs_url job_gapi
          Job.from_gapi gapi, service
        end

        def load_local file, job_gapi
          path = Pathname(file).to_path
          if job_gapi.configuration.load.source_format.nil?
            source_format = Convert.derive_source_format path
            job_gapi.configuration.load.source_format = source_format unless source_format.nil?
          end

          gapi = service.load_table_file file, job_gapi
          Job.from_gapi gapi, service
        end

        def load_local_or_uri file, updater
          job_gapi = updater.to_gapi
          if local_file? file
            load_local file, job_gapi
          else
            load_storage file, job_gapi
          end
        end

        def storage_url? files
          [files].flatten.all? do |file|
            file.respond_to?(:to_gs_url) ||
              (file.respond_to?(:to_str) && file.to_str.downcase.start_with?("gs://")) ||
              (file.is_a?(URI) && file.to_s.downcase.start_with?("gs://"))
          end
        end

        def local_file? file
          ::File.file? file
        rescue StandardError
          false
        end

        def udfs_gapi array_or_str
          return [] if array_or_str.nil?
          Array(array_or_str).map do |uri_or_code|
            resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
            if uri_or_code.start_with? "gs://"
              resource.resource_uri = uri_or_code
            else
              resource.inline_code = uri_or_code
            end
            resource
          end
        end

        ##
        # Yielded to a block to accumulate changes for a create request. See {Project#create_dataset}.
        class Updater < Dataset
          ##
          # @private A list of attributes that were updated.
          attr_reader :updates

          ##
          # @private Create an Updater object.
          def initialize gapi
            super()
            @updates = []
            @gapi = gapi
          end

          def access
            # TODO: make sure to call ensure_full_data! on Dataset#update
            @access ||= Access.from_gapi @gapi
            if block_given?
              yield @access
              check_for_mutated_access!
            end
            # Same as Dataset#access, but not frozen
            @access
          end

          ##
          # @raise [RuntimeError] not implemented
          def delete(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def create_table(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def create_view(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def create_materialized_view(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def table(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def tables(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def model(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def models(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def create_routine(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def routine(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def routines(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def query_job(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def query(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def external(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def load_job(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def load(*)
            raise "not implemented in #{self.class}"
          end

          ##
          # @raise [RuntimeError] not implemented
          def reload!
            raise "not implemented in #{self.class}"
          end
          alias refresh! reload!

          ##
          # @private Make sure any access changes are saved
          def check_for_mutated_access!
            return if @access.nil?
            return unless @access.changed?
            @gapi.update! access: @access.to_gapi
            patch_gapi! :access
          end

          ##
          # @private
          def to_gapi
            check_for_mutated_access!
            @gapi
          end

          protected

          ##
          # Queue up all the updates instead of making them.
          def patch_gapi! attribute
            @updates << attribute
            @updates.uniq!
          end
        end
      end
    end
  end
end
Modules

Classes

lib/google/cloud/bigquery/dataset.rb

Source Files