class Google::Cloud::Bigquery::Dataset


description: “This is my Dataset”
name: “My Dataset”,
dataset = bigquery.create_dataset “my_dataset”,
bigquery = Google::Cloud::Bigquery.new
require “google/cloud/bigquery”
@example
within a specific project.
you cannot control access at the table level. A dataset is contained
or more tables. Datasets are the lowest level unit of access control;
Represents a Dataset. A dataset is a grouping mechanism that holds zero
# Dataset
#

def self.from_gapi gapi, conn, access_policy_version: nil, dataset_view: nil

Other tags:
    Private: - New Dataset from a Google API Client object.
def self.from_gapi gapi, conn, access_policy_version: nil, dataset_view: nil
  new.tap do |f|
    f.gapi = gapi
    f.service = conn
    f.access_policy_version = access_policy_version
    f.dataset_view = dataset_view
  end
end

def self.new_reference project_id, dataset_id, service

Other tags:
    Private: - New lazy Dataset object without making an HTTP request, for use with the skip_lookup option.
def self.new_reference project_id, dataset_id, service
  raise ArgumentError, "dataset_id is required" unless dataset_id
  new.tap do |b|
    reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new \
      project_id: project_id, dataset_id: dataset_id
    b.service = service
    b.instance_variable_set :@reference, reference_gapi
  end
end

def access

Other tags:
    Example: Manage the access rules by passing a block: -

Returns:
  • (Google::Cloud::Bigquery::Dataset::Access) - The access object.

Other tags:
    Yieldparam: access - the object accepting rules

Other tags:
    Yield: - a block for setting rules

Other tags:
    See: https://cloud.google.com/bigquery/access-control - BigQuery Access
def access
  ensure_full_data!
  reload! unless resource_full?
  access_builder = Access.from_gapi @gapi
  if block_given?
    yield access_builder
    if access_builder.changed?
      @gapi.update! access: access_builder.to_gapi
      patch_gapi! :access
    end
  end
  access_builder.freeze
end

def api_url

Returns:
  • (String, nil) - A REST URL for the resource, or `nil` if the
def api_url
  return nil if reference?
  ensure_full_data!
  @gapi.self_link
end

def build_access_entry target_types: nil

Returns:
  • (Google::Apis::BigqueryV2::DatasetAccessEntry) - Returns a DatasetAccessEntry object.

Parameters:
  • target_types (Array) -- The list of target types within the dataset.
def build_access_entry target_types: nil
  params = {
    dataset: dataset_ref,
    target_types: target_types
  }.compact
  Google::Apis::BigqueryV2::DatasetAccessEntry.new(**params)
end

def create_materialized_view table_id,

Other tags:
    Example: Automatic refresh can be disabled: -

Returns:
  • (Google::Cloud::Bigquery::Table) - A new table object.

Parameters:
  • refresh_interval_ms (Integer) -- The maximum frequency in milliseconds at which this materialized view
  • enable_refresh (Boolean) -- Enable automatic refresh of the materialized view when the base table is
  • description (String) -- A user-friendly description of the table.
  • name (String) -- A descriptive name for the table.
  • query (String) -- The query that BigQuery executes when the materialized view is referenced.
  • table_id (String) -- The ID of the materialized view table. The ID must contain only letters (`[A-Za-z]`),

Other tags:
    See: https://cloud.google.com/bigquery/docs/materialized-views-intro - Introduction to materialized views
def create_materialized_view table_id,
                             query,
                             name: nil,
                             description: nil,
                             enable_refresh: nil,
                             refresh_interval_ms: nil
  new_view_opts = {
    table_reference:   Google::Apis::BigqueryV2::TableReference.new(
      project_id: project_id,
      dataset_id: dataset_id,
      table_id:   table_id
    ),
    friendly_name:     name,
    description:       description,
    materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
      enable_refresh:      enable_refresh,
      query:               query,
      refresh_interval_ms: refresh_interval_ms
    )
  }.compact
  new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
  gapi = service.insert_table dataset_id, new_view
  Table.from_gapi gapi, service
end

def create_routine routine_id

Other tags:
    Example: Extended example: -

Returns:
  • (Google::Cloud::Bigquery::Routine) - A new routine object.

Other tags:
    Yieldparam: routine - An updater to set additional properties on the

Other tags:
    Yield: - A block for setting properties on the routine.

Parameters:
  • routine_id (String) -- The ID of the routine. The ID must contain only
def create_routine routine_id
  ensure_service!
  new_tb = Google::Apis::BigqueryV2::Routine.new(
    routine_reference: Google::Apis::BigqueryV2::RoutineReference.new(
      project_id: project_id, dataset_id: dataset_id, routine_id: routine_id
    )
  )
  updater = Routine::Updater.new new_tb
  yield updater if block_given?
  gapi = service.insert_routine dataset_id, updater.to_gapi
  Routine.from_gapi gapi, service
end

def create_table table_id, name: nil, description: nil

Other tags:
    Example: With range partitioning. -
    Example: With time partitioning and clustering. -
    Example: You can define the schema using a nested block. -
    Example: Or the table's schema can be configured with the block. -
    Example: You can also pass name and description options. -

Returns:
  • (Google::Cloud::Bigquery::Table) - A new table object.

Other tags:
    Yieldparam: table - An updater

Other tags:
    Yield: - a block for setting the table

Parameters:
  • description (String) -- A user-friendly description of the table.
  • name (String) -- A descriptive name for the table.
  • table_id (String) -- The ID of the table. The ID must contain only
def create_table table_id, name: nil, description: nil
  ensure_service!
  new_tb = Google::Apis::BigqueryV2::Table.new(
    table_reference: Google::Apis::BigqueryV2::TableReference.new(
      project_id: project_id, dataset_id: dataset_id,
      table_id: table_id
    )
  )
  updater = Table::Updater.new(new_tb).tap do |tb|
    tb.name = name unless name.nil?
    tb.description = description unless description.nil?
  end
  yield updater if block_given?
  gapi = service.insert_table dataset_id, updater.to_gapi
  Table.from_gapi gapi, service
end

def create_view table_id,

Other tags:
    Example: A name and description can be provided: -

Returns:
  • (Google::Cloud::Bigquery::Table) - A new table object.

Parameters:
  • udfs (Array, String) -- User-defined function resources
  • legacy_sql (Boolean) -- Specifies whether to use BigQuery's
  • standard_sql (Boolean) -- Specifies whether to use BigQuery's
  • description (String) -- A user-friendly description of the table.
  • name (String) -- A descriptive name for the table.
  • query (String) -- The query that BigQuery executes when the view
  • table_id (String) -- The ID of the view table. The ID must contain

Other tags:
    See: https://cloud.google.com/bigquery/docs/views - Creating views
def create_view table_id,
                query,
                name: nil,
                description: nil,
                standard_sql: nil,
                legacy_sql: nil,
                udfs: nil
  use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
  new_view_opts = {
    table_reference: Google::Apis::BigqueryV2::TableReference.new(
      project_id: project_id,
      dataset_id: dataset_id,
      table_id:   table_id
    ),
    friendly_name:   name,
    description:     description,
    view:            Google::Apis::BigqueryV2::ViewDefinition.new(
      query:                           query,
      use_legacy_sql:                  use_legacy_sql,
      user_defined_function_resources: udfs_gapi(udfs)
    )
  }.compact
  new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
  gapi = service.insert_table dataset_id, new_view
  Table.from_gapi gapi, service
end

def created_at

Returns:
  • (Time, nil) - The creation time, or `nil` if not present or the
def created_at
  return nil if reference?
  ensure_full_data!
  Convert.millis_to_time @gapi.creation_time
end

def dataset_id

Returns:
  • (String) - The ID must contain only letters (`[A-Za-z]`), numbers
def dataset_id
  return reference.dataset_id if reference?
  @gapi.dataset_reference.dataset_id
end

def dataset_ref

Other tags:
    Private: -
def dataset_ref
  dataset_ref = reference? ? reference : @gapi.dataset_reference
  dataset_ref = dataset_ref.to_h if dataset_ref.respond_to? :to_h
  dataset_ref
end

def default_collation

Returns:
  • (String, nil) - The default collation, or `nil` if not present or the object is a
def default_collation
  return nil if reference?
  ensure_full_data!
  @gapi.default_collation
end

def default_collation= new_default_collation

Parameters:
  • new_default_collation (String) -- The new default collation for the dataset.
def default_collation= new_default_collation
  reload! unless resource_full?
  @gapi.update! default_collation: new_default_collation
  patch_gapi! :default_collation
end

def default_encryption

Returns:
  • (EncryptionConfiguration, nil) - The default encryption

Other tags:
    See: https://cloud.google.com/bigquery/docs/customer-managed-encryption -
def default_encryption
  return nil if reference?
  ensure_full_data!
  return nil if @gapi.default_encryption_configuration.nil?
  EncryptionConfiguration.from_gapi(@gapi.default_encryption_configuration).freeze
end

def default_encryption= value

Parameters:
  • value (EncryptionConfiguration) -- The new encryption config.

Other tags:
    See: https://cloud.google.com/bigquery/docs/customer-managed-encryption -
def default_encryption= value
  ensure_full_data!
  @gapi.default_encryption_configuration = value.to_gapi
  patch_gapi! :default_encryption_configuration
end

def default_expiration

Returns:
  • (Integer, nil) - The default table expiration in milliseconds,
def default_expiration
  return nil if reference?
  ensure_full_data!
  begin
    Integer @gapi.default_table_expiration_ms
  rescue StandardError
    nil
  end
end

def default_expiration= new_default_expiration

Parameters:
  • new_default_expiration (Integer) -- The new default table
def default_expiration= new_default_expiration
  reload! unless resource_full?
  @gapi.update! default_table_expiration_ms: new_default_expiration
  patch_gapi! :default_table_expiration_ms
end

def delete force: nil

Returns:
  • (Boolean) - Returns `true` if the dataset was deleted.

Parameters:
  • force (Boolean) -- If `true`, delete all the tables in the
def delete force: nil
  ensure_service!
  service.delete_dataset dataset_id, force
  # Set flag for #exists?
  @exists = false
  true
end

def description

Returns:
  • (String, nil) - The description, or `nil` if the object is a
def description
  return nil if reference?
  ensure_full_data!
  @gapi.description
end

def description= new_description

Parameters:
  • new_description (String) -- The new description for the dataset.
def description= new_description
  reload! unless resource_full?
  @gapi.update! description: new_description
  patch_gapi! :description
end

def ensure_full_data!

only partially loaded by a request to the API list method.
Load the complete representation of the dataset if it has been
#
def ensure_full_data!
  reload! unless resource_full?
end

def ensure_gapi!

from the service.
Ensures the Google::Apis::BigqueryV2::Dataset object has been loaded
#
def ensure_gapi!
  ensure_service!
  return unless reference?
  reload!
end

def ensure_job_succeeded! job

def ensure_job_succeeded! job
  return unless job.failed?
  begin
    # raise to activate ruby exception cause handling
    raise job.gapi_error
  rescue StandardError => e
    # wrap Google::Apis::Error with Google::Cloud::Error
    raise Google::Cloud::Error.from_error(e)
  end
end

def ensure_service!

Raise an error unless an active service is available.
#
def ensure_service!
  raise "Must have active connection" unless service
end

def etag

Returns:
  • (String, nil) - The ETag hash, or `nil` if the object is a
def etag
  return nil if reference?
  ensure_full_data!
  @gapi.etag
end

def exists? force: false

Returns:
  • (Boolean) - `true` when the dataset exists in the BigQuery

Parameters:
  • force (Boolean) -- Force the latest resource representation to be
def exists? force: false
  return gapi_exists? if force
  # If we have a memoized value, return it
  return @exists unless @exists.nil?
  # Always true if we have a gapi object
  return true if resource?
  gapi_exists?
end

def external url, format: nil

Returns:
  • (External::DataSource) - External data source.

Parameters:
  • format (String|Symbol) -- The data format. This value will be used
  • url (String, Array) -- The fully-qualified URL(s) that

Other tags:
    See: https://cloud.google.com/bigquery/external-data-sources - Querying
def external url, format: nil
  ext = External.from_urls url, format
  yield ext if block_given?
  ext
end

def gapi_exists?

Fetch gapi and memoize whether resource exists.
#
def gapi_exists?
  reload!
  @exists = true
rescue Google::Cloud::NotFoundError
  @exists = false
end

def initialize

Other tags:
    Private: - Create an empty Dataset object.
def initialize
  @service = nil
  @gapi = nil
  @reference = nil
  @access_policy_version = nil
  @dataset_view = nil
end

def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block

Other tags:
    Example: Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`: -
    Example: Using `autocreate` to create a new table if none exists. -
    Example: Avoid retrieving the dataset with `skip_lookup`: -

Returns:
  • (Google::Cloud::Bigquery::InsertResponse) - An insert response

Other tags:
    Yieldparam: table - An updater

Other tags:
    Yield: - a block for setting the table

Parameters:
  • autocreate (Boolean) -- Specifies whether the method should create
  • ignore_unknown (Boolean) -- Accept rows that contain values that
  • skip_invalid (Boolean) -- Insert all valid rows of a request, even
  • insert_ids (Array, Symbol) -- A unique ID for each row. BigQuery uses this property to
  • rows (Hash, Array) -- A hash object or array of hash objects
  • table_id (String) -- The ID of the destination table.

Other tags:
    See: https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts -
    See: https://cloud.google.com/bigquery/streaming-data-into-bigquery -
def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
  rows = [rows] if rows.is_a? Hash
  raise ArgumentError, "No rows provided" if rows.empty?
  insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
  insert_ids = Array insert_ids
  if insert_ids.count.positive? && insert_ids.count != rows.count
    raise ArgumentError, "insert_ids must be the same size as rows"
  end
  if autocreate
    insert_data_with_autocreate table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
                                                insert_ids: insert_ids, &block
  else
    insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
                                insert_ids: insert_ids
  end
end

def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,

Other tags:
    Example: Avoid retrieving transient stats of the table with while inserting : -

Returns:
  • (Table::AsyncInserter) - Returns an inserter object.

Other tags:
    Yieldparam: result - the result of the

Other tags:
    Yield: - the callback for when a batch of rows is inserted

Parameters:
  • view (String) -- Specifies the view that determines which table information is returned.
  • max_rows (Integer) -- The maximum number of rows to be collected
  • ignore_unknown (Boolean) -- Accept rows that contain values that
  • skip_invalid (Boolean) -- Insert all valid rows of a request, even
  • table_id (String) -- The ID of the table to insert rows into.

Other tags:
    Attr_reader: threads - The number of threads used to insert
    Attr_reader: interval - The number of seconds to collect
    Attr_reader: max_bytes - The maximum size of rows to be
def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
                 interval: 10, threads: 4, view: nil, &block
  ensure_service!
  # Get table, don't use Dataset#table which handles NotFoundError
  gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: view
  table = Table.from_gapi gapi, service, metadata_view: view
  # Get the AsyncInserter from the table
  table.insert_async skip_invalid: skip_invalid,
                     ignore_unknown: ignore_unknown,
                     max_bytes: max_bytes, max_rows: max_rows,
                     interval: interval, threads: threads, &block
end

def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil

def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
  rows = [rows] if rows.is_a? Hash
  raise ArgumentError, "No rows provided" if rows.empty?
  ensure_service!
  gapi = service.insert_tabledata dataset_id, table_id, rows, skip_invalid:   skip_invalid,
                                                              ignore_unknown: ignore_unknown,
                                                              insert_ids:     insert_ids,
                                                              project_id:     project_id
  InsertResponse.from_gapi rows, gapi
end

def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil

def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
  insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown, insert_ids: insert_ids
rescue Google::Cloud::NotFoundError
  sleep rand(1..60)
  begin
    create_table table_id do |tbl_updater|
      yield tbl_updater if block_given?
    end
  rescue Google::Cloud::AlreadyExistsError
    # Do nothing if it already exists
  end
  sleep 60
  retry
end

def labels

Returns:
  • (Hash, nil) - A hash containing key/value pairs,
def labels
  return nil if reference?
  m = @gapi.labels
  m = m.to_h if m.respond_to? :to_h
  m.dup.freeze
end

def labels= labels

Parameters:
  • labels (Hash) -- A hash containing key/value
def labels= labels
  reload! unless resource_full?
  @gapi.labels = labels
  patch_gapi! :labels
end

def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,

Other tags:
    Example: Schema is not required with a Cloud Datastore backup: -
    Example: Upload a file directly: -
    Example: Pass a list of google-cloud-storage files: -
    Example: Pass a google-cloud-storage `File` instance: -

Returns:
  • (Boolean) - Returns `true` if the load job was successful.

Other tags:
    Yieldparam: updater - An

Other tags:
    Yield: - A block for setting the schema of the destination

Parameters:
  • reservation (String) -- The reservation that job would use. User
  • preserve_ascii_control_characters (Boolean) -- When source_format
  • reference_file_schema_uri (String) -- The URI of the reference
  • time_zone (String) -- The time zone used when parsing timestamp
  • source_column_match (String) -- Controls the strategy used to
  • null_markers (Array) -- A list of strings represented as
  • timestamp_format (String) -- Format used to parse
  • time_format (String) -- Format used to parse TIME values.
  • datetime_format (String) -- Format used to parse DATETIME
  • date_format (String) -- Format used to parse DATE values.
  • session_id (string) -- Session ID in which the load job must run.
  • schema (Google::Cloud::Bigquery::Schema) -- The schema for the
  • skip_leading (Integer) -- The number of rows at the top of a CSV
  • quote (String) -- The value that is used to quote data sections in
  • null_marker (String) -- Specifies a string that represents a null
  • max_bad_records (Integer) -- The maximum number of bad records
  • ignore_unknown (Boolean) -- Indicates if BigQuery should allow
  • delimiter (String) -- Specifices the separator for fields in a CSV
  • encoding (String) -- The character encoding of the data. The
  • autodetect (Boolean) -- Indicates if BigQuery should
  • quoted_newlines (Boolean) -- Indicates if BigQuery should allow
  • jagged_rows (Boolean) -- Accept rows that are missing trailing
  • projection_fields (Array) -- If the `format` option is set
  • write (String) -- Specifies how to handle data already present in
  • create (String) -- Specifies whether the job is allowed to create
  • format (String) -- The exported file format. The default value is
  • File, (File, Google::Cloud::Storage::File, String, URI, ) -- Google::Cloud::Storage::File, String, URI,
  • table_id (String) -- The destination table to load the data into.
def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
         quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
         quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil,
         date_format: nil, datetime_format: nil, time_format: nil, timestamp_format: nil,
         null_markers: nil, source_column_match: nil, time_zone: nil, reference_file_schema_uri: nil,
         preserve_ascii_control_characters: nil,
         reservation: nil, &block
  job = load_job table_id, files,
                 format: format, create: create, write: write, projection_fields: projection_fields,
                 jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
                 delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
                 quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
                 null_marker: null_marker, session_id: session_id, date_format: date_format,
                 datetime_format: datetime_format, time_format: time_format, timestamp_format: timestamp_format,
                 null_markers: null_markers, source_column_match: source_column_match, time_zone: time_zone,
                 reference_file_schema_uri: reference_file_schema_uri,
                 preserve_ascii_control_characters: preserve_ascii_control_characters,
                 reservation: reservation, &block
  job.wait_until_done!
  ensure_job_succeeded! job
  true
end

def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,

Other tags:
    Example: Schema is not required with a Cloud Datastore backup: -
    Example: Upload a file directly: -
    Example: Pass a list of google-cloud-storage files: -
    Example: Pass a google-cloud-storage `File` instance: -

Returns:
  • (Google::Cloud::Bigquery::LoadJob) - A new load job object.

Other tags:
    Yieldparam: updater - An

Other tags:
    Yield: - A block for setting the schema and other

Parameters:
  • reservation (String) -- The reservation that job would use. User
  • preserve_ascii_control_characters (Boolean) -- When source_format
  • reference_file_schema_uri (String) -- The URI of the reference
  • time_zone (String) -- The time zone used when parsing timestamp
  • source_column_match (String) -- Controls the strategy used to
  • null_markers (Array) -- A list of strings represented as
  • timestamp_format (String) -- Format used to parse
  • time_format (String) -- Format used to parse TIME values.
  • datetime_format (String) -- Format used to parse DATETIME
  • date_format (String) -- Format used to parse DATE values.
  • session_id (string) -- Session ID in which the load job must run.
  • create_session (Boolean) -- If set to true a new session will be created
  • dryrun (Boolean) -- If set, don't actually run this job. Behavior
  • labels (Hash) -- A hash of user-provided labels associated with
  • prefix (String) -- A string, usually human-readable, that will be
  • job_id (String) -- A user-defined ID for the load job. The ID
  • schema (Google::Cloud::Bigquery::Schema) -- The schema for the
  • skip_leading (Integer) -- The number of rows at the top of a CSV
  • quote (String) -- The value that is used to quote data sections in
  • null_marker (String) -- Specifies a string that represents a null
  • max_bad_records (Integer) -- The maximum number of bad records
  • ignore_unknown (Boolean) -- Indicates if BigQuery should allow
  • delimiter (String) -- Specifices the separator for fields in a CSV
  • encoding (String) -- The character encoding of the data. The
  • autodetect (Boolean) -- Indicates if BigQuery should
  • quoted_newlines (Boolean) -- Indicates if BigQuery should allow
  • jagged_rows (Boolean) -- Accept rows that are missing trailing
  • projection_fields (Array) -- If the `format` option is set
  • write (String) -- Specifies how to handle data already present in
  • create (String) -- Specifies whether the job is allowed to create
  • format (String) -- The exported file format. The default value is
  • File, (File, Google::Cloud::Storage::File, String, URI, ) -- Google::Cloud::Storage::File, String, URI,
  • table_id (String) -- The destination table to load the data into.
def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
             quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
             quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
             null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, date_format: nil,
             datetime_format: nil, time_format: nil, timestamp_format: nil, null_markers: nil,
             source_column_match: nil, time_zone: nil, reference_file_schema_uri: nil,
             preserve_ascii_control_characters: nil, reservation: nil
  ensure_service!
  updater = load_job_updater table_id,
                             format: format, create: create, write: write, projection_fields: projection_fields,
                             jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
                             delimiter: delimiter, ignore_unknown: ignore_unknown,
                             max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
                             dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
                             autodetect: autodetect, null_marker: null_marker, create_session: create_session,
                             session_id: session_id, date_format: date_format, datetime_format: datetime_format,
                             time_format: time_format, timestamp_format: timestamp_format,
                             null_markers: null_markers, source_column_match: source_column_match,
                             time_zone: time_zone, reference_file_schema_uri: reference_file_schema_uri,
                             preserve_ascii_control_characters: preserve_ascii_control_characters,
                             reservation: reservation
  yield updater if block_given?
  load_local_or_uri files, updater
end

def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,

def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,
                          skip_leading: nil, null_marker: nil, null_markers: nil, source_column_match: nil,
                          preserve_ascii_control_characters: nil
  job.jagged_rows = jagged_rows unless jagged_rows.nil?
  job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
  job.delimiter = delimiter unless delimiter.nil?
  job.null_marker = null_marker unless null_marker.nil?
  job.quote = quote unless quote.nil?
  job.skip_leading = skip_leading unless skip_leading.nil?
  job.null_markers = null_markers unless null_markers.nil?
  job.source_column_match = source_column_match unless source_column_match.nil?
  job.preserve_ascii_control_characters = preserve_ascii_control_characters unless
    preserve_ascii_control_characters.nil?
end

def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,

def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
                           encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
                           skip_leading: nil, null_marker: nil, date_format: nil, datetime_format: nil,
                           time_format: nil, timestamp_format: nil, null_markers: nil, source_column_match: nil,
                           time_zone: nil, reference_file_schema_uri: nil,
                           preserve_ascii_control_characters: nil
  job.format = format unless format.nil?
  job.projection_fields = projection_fields unless projection_fields.nil?
  job.encoding = encoding unless encoding.nil?
  job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
  job.max_bad_records = max_bad_records unless max_bad_records.nil?
  job.date_format = date_format unless date_format.nil?
  job.datetime_format = datetime_format unless datetime_format.nil?
  job.time_format = time_format unless time_format.nil?
  job.timestamp_format = timestamp_format unless timestamp_format.nil?
  job.time_zone = time_zone unless time_zone.nil?
  job.reference_file_schema_uri = reference_file_schema_uri unless reference_file_schema_uri.nil?
  load_job_csv_options! job, jagged_rows:     jagged_rows,
                             quoted_newlines: quoted_newlines,
                             delimiter:       delimiter,
                             quote:           quote,
                             skip_leading:    skip_leading,
                             null_marker:     null_marker,
                             null_markers:    null_markers,
                             source_column_match: source_column_match,
                             preserve_ascii_control_characters: preserve_ascii_control_characters
end

def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil, reservation: nil

def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil, reservation: nil
  job_ref = service.job_ref_from job_id, prefix
  Google::Apis::BigqueryV2::Job.new(
    job_reference: job_ref,
    configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
      load:    Google::Apis::BigqueryV2::JobConfigurationLoad.new(
        destination_table: Google::Apis::BigqueryV2::TableReference.new(
          project_id: project_id,
          dataset_id: dataset_id,
          table_id:   table_id
        )
      ),
      dry_run: dryrun,
      reservation: reservation
    )
  )
end

def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,

def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
                     quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
                     max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
                     prefix: nil, labels: nil, autodetect: nil, null_marker: nil, create_session: nil,
                     session_id: nil, date_format: nil, datetime_format: nil, time_format: nil,
                     timestamp_format: nil, null_markers: nil, source_column_match: nil, time_zone: nil,
                     reference_file_schema_uri: nil, preserve_ascii_control_characters: nil, reservation: nil
  new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix, reservation: reservation
  LoadJob::Updater.new(new_job).tap do |job|
    job.location = location if location # may be dataset reference
    job.create = create unless create.nil?
    job.write = write unless write.nil?
    job.schema = schema unless schema.nil?
    job.autodetect = autodetect unless autodetect.nil?
    job.labels = labels unless labels.nil?
    job.create_session = create_session unless create_session.nil?
    job.session_id = session_id unless session_id.nil?
    load_job_file_options! job, format:            format,
                                projection_fields: projection_fields,
                                jagged_rows:       jagged_rows,
                                quoted_newlines:   quoted_newlines,
                                encoding:          encoding,
                                delimiter:         delimiter,
                                ignore_unknown:    ignore_unknown,
                                max_bad_records:   max_bad_records,
                                quote:             quote,
                                skip_leading:      skip_leading,
                                null_marker:       null_marker,
                                date_format:       date_format,
                                datetime_format:  datetime_format,
                                time_format:       time_format,
                                timestamp_format:  timestamp_format,
                                null_markers:      null_markers,
                                source_column_match: source_column_match,
                                time_zone:         time_zone,
                                reference_file_schema_uri: reference_file_schema_uri,
                                preserve_ascii_control_characters: preserve_ascii_control_characters
  end
end

def load_local file, job_gapi

def load_local file, job_gapi
  path = Pathname(file).to_path
  if job_gapi.configuration.load.source_format.nil?
    source_format = Convert.derive_source_format path
    job_gapi.configuration.load.source_format = source_format unless source_format.nil?
  end
  gapi = service.load_table_file file, job_gapi
  Job.from_gapi gapi, service
end

def load_local_or_uri file, updater

def load_local_or_uri file, updater
  job_gapi = updater.to_gapi
  if local_file? file
    load_local file, job_gapi
  else
    load_storage file, job_gapi
  end
end

def load_storage urls, job_gapi

def load_storage urls, job_gapi
  # Convert to storage URL
  urls = [urls].flatten.map do |url|
    if url.respond_to? :to_gs_url
      url.to_gs_url
    elsif url.is_a? URI
      url.to_s
    else
      url
    end
  end
  unless urls.nil?
    job_gapi.configuration.load.update! source_uris: urls
    if job_gapi.configuration.load.source_format.nil?
      source_format = Convert.derive_source_format_from_list urls
      job_gapi.configuration.load.source_format = source_format unless source_format.nil?
    end
  end
  gapi = service.load_table_gs_url job_gapi
  Job.from_gapi gapi, service
end

def local_file? file

def local_file? file
  ::File.file? file
rescue StandardError
  false
end

def location

Returns:
  • (String, nil) - The geographic location, or `nil` if the object
def location
  return nil if reference?
  @gapi.location
end

def model model_id, skip_lookup: nil

Other tags:
    Example: Avoid retrieving the model resource with `skip_lookup`: -

Returns:
  • (Google::Cloud::Bigquery::Model, nil) - Returns `nil` if the

Parameters:
  • skip_lookup (Boolean) -- Optionally create just a local reference
  • model_id (String) -- The ID of a model.
def model model_id, skip_lookup: nil
  ensure_service!
  return Model.new_reference project_id, dataset_id, model_id, service if skip_lookup
  gapi = service.get_model dataset_id, model_id
  Model.from_gapi_json gapi, service
rescue Google::Cloud::NotFoundError
  nil
end

def models token: nil, max: nil

Other tags:
    Example: Retrieve all models: (See {Model::List#all}) -

Returns:
  • (Array) - An array of models

Parameters:
  • max (Integer) -- Maximum number of models to return.
  • token (String) -- A previously-returned page token representing
def models token: nil, max: nil
  ensure_service!
  gapi = service.list_models dataset_id, token: token, max: max
  Model::List.from_gapi gapi, service, dataset_id, max
end

def modified_at

Returns:
  • (Time, nil) - The last modified time, or `nil` if not present or
def modified_at
  return nil if reference?
  ensure_full_data!
  Convert.millis_to_time @gapi.last_modified_time
end

def name

Returns:
  • (String, nil) - The friendly name, or `nil` if the object is
def name
  return nil if reference?
  @gapi.friendly_name
end

def name= new_name

Parameters:
  • new_name (String) -- The new friendly name, or `nil` if the object
def name= new_name
  reload! unless resource_full?
  @gapi.update! friendly_name: new_name
  patch_gapi! :friendly_name
end

def patch_gapi! *attributes

def patch_gapi! *attributes
  return if attributes.empty?
  ensure_service!
  patch_args = attributes.to_h { |attr| [attr, @gapi.send(attr)] }
  update_mode = nil
  has_access_key = patch_args.key? :access
  other_keys_exist = (patch_args.keys - [:access]).any?
  if has_access_key && !other_keys_exist
    update_mode = UpdateMode::UPDATE_ACL
  elsif !has_access_key && other_keys_exist
    update_mode = UpdateMode::UPDATE_METADATA
  elsif has_access_key && other_keys_exist
    update_mode = UpdateMode::FULL
  end
  patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
  patch_gapi.etag = etag if etag
  @gapi = service.patch_dataset dataset_id, patch_gapi, access_policy_version: @access_policy_version,
ode: update_mode
end

def project_id

Returns:
  • (String) - The project ID.
def project_id
  return reference.project_id if reference?
  @gapi.dataset_reference.project_id
end

def query query,

Other tags:
    Example: Query using external data source, set destination: -
    Example: Run query in a session: -
    Example: Execute a DML statement: -
    Example: Execute a DDL statement: -
    Example: Query using named query parameters with types: -
    Example: Query using named query parameters: -
    Example: Query using positional query parameters: -
    Example: Query using legacy SQL: -
    Example: Query using standard SQL: -

Returns:
  • (Google::Cloud::Bigquery::Data) - A new data object.

Other tags:
    Yieldparam: job - a job

Other tags:
    Yield: - a job configuration object

Parameters:
  • reservation (String) -- The reservation that job would use. User
  • format_options_use_int64_timestamp (Boolean) -- Output timestamp
  • session_id (String) -- The ID of an existing session. See the
  • legacy_sql (Boolean) -- Specifies whether to use BigQuery's
  • standard_sql (Boolean) -- Specifies whether to use BigQuery's
  • cache (Boolean) -- Whether to look for the result in the query
  • max (Integer) -- The maximum number of rows of data to return per
  • external (Hash) -- A Hash
  • types (Array, Hash) -- Standard SQL only. Types of the SQL parameters in `params`. It is not always
  • params (Array, Hash) -- Standard SQL only. Used to pass query arguments when the `query` string contains
  • query (String) -- A query string, following the BigQuery [query

Other tags:
    See: https://cloud.google.com/bigquery/querying-data - Querying Data
def query query,
          params: nil,
          types: nil,
          external: nil,
          max: nil,
          cache: true,
          standard_sql: nil,
          legacy_sql: nil,
          session_id: nil,
          format_options_use_int64_timestamp: true,
          reservation: nil,
          &block
  job = query_job query,
                  params: params,
                  types: types,
                  external: external,
                  cache: cache,
                  standard_sql: standard_sql,
                  legacy_sql: legacy_sql,
                  session_id: session_id,
                  reservation: reservation,
                  &block
  job.wait_until_done!
  ensure_job_succeeded! job
  job.data max: max, format_options_use_int64_timestamp: format_options_use_int64_timestamp
end

def query_job query,

Other tags:
    Example: Query using external data source, set destination: -
    Example: Run query in a session: -
    Example: Execute a DML statement: -
    Example: Execute a DDL statement: -
    Example: Query using named query parameters with types: -
    Example: Query using named query parameters: -
    Example: Query using positional query parameters: -
    Example: Query using legacy SQL: -
    Example: Query using standard SQL: -

Returns:
  • (Google::Cloud::Bigquery::QueryJob) - A new query job object.

Other tags:
    Yieldparam: job - a job

Other tags:
    Yield: - a job configuration object

Parameters:
  • reservation (String) -- The reservation that job would use. User
  • session_id (String) -- The ID of an existing session. See also the
  • create_session (Boolean) -- If true, creates a new session, where the
  • udfs (Array, String) -- User-defined function resources
  • labels (Hash) -- A hash of user-provided labels associated with
  • prefix (String) -- A string, usually human-readable, that will be
  • job_id (String) -- A user-defined ID for the query job. The ID
  • maximum_bytes_billed (Integer) -- Limits the bytes billed for this
  • maximum_billing_tier (Integer) -- Limits the billing tier for this
  • flatten (Boolean) -- This option is specific to Legacy SQL.
  • large_results (Boolean) -- This option is specific to Legacy SQL.
  • legacy_sql (Boolean) -- Specifies whether to use BigQuery's
  • standard_sql (Boolean) -- Specifies whether to use BigQuery's
  • dryrun (Boolean) -- If set to true, BigQuery doesn't run the job.
  • write (String) -- Specifies the action that occurs if the
  • create (String) -- Specifies whether the job is allowed to create
  • table (Table) -- The destination table where the query results
  • cache (Boolean) -- Whether to look for the result in the query
  • priority (String) -- Specifies a priority for the query. Possible
  • external (Hash) -- A Hash
  • types (Array, Hash) -- Standard SQL only. Types of the SQL parameters in `params`. It is not always
  • params (Array, Hash) -- Standard SQL only. Used to pass query arguments when the `query` string contains
  • query (String) -- A query string, following the BigQuery [query
def query_job query,
              params: nil,
              types: nil,
              external: nil,
              priority: "INTERACTIVE",
              cache: true,
              table: nil,
              create: nil,
              write: nil,
              dryrun: nil,
              standard_sql: nil,
              legacy_sql: nil,
              large_results: nil,
              flatten: nil,
              maximum_billing_tier: nil,
              maximum_bytes_billed: nil,
              job_id: nil,
              prefix: nil,
              labels: nil,
              udfs: nil,
              create_session: nil,
              session_id: nil,
              reservation: nil
  ensure_service!
  options = {
    params: params,
    types: types,
    external: external,
    priority: priority,
    cache: cache,
    table: table,
    create: create,
    write: write,
    dryrun: dryrun,
    standard_sql: standard_sql,
    legacy_sql: legacy_sql,
    large_results: large_results,
    flatten: flatten,
    maximum_billing_tier: maximum_billing_tier,
    maximum_bytes_billed: maximum_bytes_billed,
    job_id: job_id,
    prefix: prefix,
    labels: labels,
    udfs: udfs,
    create_session: create_session,
    session_id: session_id,
    reservation: reservation
  }
  updater = QueryJob::Updater.from_options service, query, options
  updater.dataset = self
  updater.location = location if location # may be dataset reference
  yield updater if block_given?
  gapi = service.query_job updater.to_gapi
  Job.from_gapi gapi, service
end

def reference?

Returns:
  • (Boolean) - `true` when the dataset is just a local reference
def reference?
  @gapi.nil?
end

def reload!

Other tags:
    Example: Skip retrieving the dataset from the service, then load it: -

Returns:
  • (Google::Cloud::Bigquery::Dataset) - Returns the reloaded
def reload!
  ensure_service!
  @gapi = service.get_project_dataset project_id, dataset_id, access_policy_version: @access_policy_version,
    dataset_view: @dataset_view
  @reference = nil
  @exists = nil
  self
end

def resource?

Returns:
  • (Boolean) - `true` when the dataset was created with a resource
def resource?
  !@gapi.nil?
end

def resource_full?

Returns:
  • (Boolean) - `true` when the dataset was created with a full
def resource_full?
  @gapi.is_a? Google::Apis::BigqueryV2::Dataset
end

def resource_partial?

Returns:
  • (Boolean) - `true` when the dataset was created with a partial
def resource_partial?
  @gapi.is_a? Google::Apis::BigqueryV2::DatasetList::Dataset
end

def routine routine_id, skip_lookup: nil

Other tags:
    Example: Avoid retrieving the routine resource with `skip_lookup`: -

Returns:
  • (Google::Cloud::Bigquery::Routine, nil) - Returns `nil` if the

Parameters:
  • skip_lookup (Boolean) -- Optionally create just a local reference
  • routine_id (String) -- The ID of a routine.
def routine routine_id, skip_lookup: nil
  ensure_service!
  return Routine.new_reference project_id, dataset_id, routine_id, service if skip_lookup
  gapi = service.get_routine dataset_id, routine_id
  Routine.from_gapi gapi, service
rescue Google::Cloud::NotFoundError
  nil
end

def routines token: nil, max: nil, filter: nil

Other tags:
    Example: Retrieve all routines: (See {Routine::List#all}) -

Returns:
  • (Array) - An array of routines

Parameters:
  • filter (String) -- If set, then only the routines matching this filter are returned. The current supported
  • max (Integer) -- Maximum number of routines to return.
  • token (String) -- A previously-returned page token representing
def routines token: nil, max: nil, filter: nil
  ensure_service!
  gapi = service.list_routines dataset_id, token: token, max: max, filter: filter
  Routine::List.from_gapi gapi, service, dataset_id, max, filter: filter
end

def storage_billing_model

Returns:
  • (String, nil) - A string containing the storage billing model, or `nil`.

Other tags:
    See: https://cloud.google.com/blog/products/data-analytics/new-bigquery-billing-model-helps-reduce-physical-storage-costs -
def storage_billing_model
  return nil if reference?
  ensure_full_data!
  @gapi.storage_billing_model
end

def storage_billing_model= value

Parameters:
  • value (String) -- The new storage billing model. Accepted values

Other tags:
    See: https://cloud.google.com/blog/products/data-analytics/new-bigquery-billing-model-helps-reduce-physical-storage-costs -
def storage_billing_model= value
  ensure_full_data!
  @gapi.storage_billing_model = value
  patch_gapi! :storage_billing_model
end

def storage_url? files

def storage_url? files
  [files].flatten.all? do |file|
    file.respond_to?(:to_gs_url) ||
      (file.respond_to?(:to_str) && file.to_str.downcase.start_with?("gs://")) ||
      (file.is_a?(URI) && file.to_s.downcase.start_with?("gs://"))
  end
end

def table table_id, skip_lookup: nil, view: nil

Other tags:
    Example: Avoid retrieving transient stats of the table with `view`: -
    Example: Avoid retrieving the table resource with `skip_lookup`: -

Returns:
  • (Google::Cloud::Bigquery::Table, nil) - Returns `nil` if the

Parameters:
  • view (String) -- Specifies the view that determines which table information is returned.
  • skip_lookup (Boolean) -- Optionally create just a local reference
  • table_id (String) -- The ID of a table.
def table table_id, skip_lookup: nil, view: nil
  ensure_service!
  return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
  gapi = service.get_project_table project_id, dataset_id, table_id, metadata_view: view
  Table.from_gapi gapi, service, metadata_view: view
rescue Google::Cloud::NotFoundError
  nil
end

def tables token: nil, max: nil

Other tags:
    Example: Retrieve all tables: (See {Table::List#all}) -

Returns:
  • (Array) - An array of tables

Parameters:
  • max (Integer) -- Maximum number of tables to return.
  • token (String) -- A previously-returned page token representing
def tables token: nil, max: nil
  ensure_service!
  gapi = service.list_tables dataset_id, token: token, max: max
  Table::List.from_gapi gapi, service, dataset_id, max
end

def tags

Returns:
  • (Google::Cloud::Bigquery::Dataset::Tag) - The list of tags.

Other tags:
    See: https://cloud.google.com/resource-manager/docs/tags/tags-overview -
def tags
  ensure_full_data!
  return nil if @gapi.tags.nil?
  @gapi.tags.map { |gapi| Tag.from_gapi gapi }
end

def udfs_gapi array_or_str

def udfs_gapi array_or_str
  return [] if array_or_str.nil?
  Array(array_or_str).map do |uri_or_code|
    resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
    if uri_or_code.start_with? "gs://"
      resource.resource_uri = uri_or_code
    else
      resource.inline_code = uri_or_code
    end
    resource
  end
end