class Google::Cloud::Bigquery::Service
@private Represents the Bigquery service and API calls.
#
def self.dataset_access_entry_from_hash dataset_hash
-
dataset_hash(Hash) -- Hash for a DatasetAccessEntry.
def self.dataset_access_entry_from_hash dataset_hash params = { dataset: Google::Apis::BigqueryV2::DatasetReference.new(**dataset_hash), target_types: dataset_hash[:target_types] }.compact Google::Apis::BigqueryV2::DatasetAccessEntry.new(**params) end
def self.get_table_ref table, default_ref: nil
def self.get_table_ref table, default_ref: nil if table.respond_to? :table_ref table.table_ref else table_ref_from_s table, default_ref: default_ref end end
def self.table_ref_from_s str, default_ref: {}
("bigquery-public-data.samples.shakespeare") and Legacy SQL
The regex matches both Standard SQL
dataset_id from the default table ref if they are missing.
"my-project:my_dataset.my_table". Then merges project_id and
from strings in the formats: "my_table", "my_dataset.my_table", or
Extracts at least `tbl` group, and possibly `dts` and `prj` groups,
#
def self.table_ref_from_s str, default_ref: {} str = str.to_s m = /\A(((?<prj>\S*)(:|\.))?(?<dts>\S*)\.)?(?<tbl>\S*)\z/.match str raise ArgumentError, "unable to identify table from #{str.inspect}" unless m str_table_ref_hash = { project_id: m["prj"], dataset_id: m["dts"], table_id: m["tbl"] }.compact str_table_ref_hash = default_ref.to_h.merge str_table_ref_hash ref = Google::Apis::BigqueryV2::TableReference.new(**str_table_ref_hash) validate_table_ref ref ref end
def self.validate_table_ref table_ref
def self.validate_table_ref table_ref [:project_id, :dataset_id, :table_id].each do |f| raise ArgumentError, "TableReference is missing #{f}" if table_ref.send(f).nil? end end
def cancel_job job_id, location: nil
#
def cancel_job job_id, location: nil # The BigQuery team has told us cancelling is considered idempotent execute backoff: true do service.cancel_job @project, job_id, location: location end end
def copy_table copy_job_gapi
def copy_table copy_job_gapi execute backoff: true do service.insert_job @project, copy_job_gapi end end
def dataset_ref_from dts, pjt = nil
def dataset_ref_from dts, pjt = nil return nil if dts.nil? if dts.respond_to? :dataset_id pjt ||= dts.project_id || @project Google::Apis::BigqueryV2::DatasetReference.new( project_id: pjt, dataset_id: dts.dataset_id ) else pjt ||= @project Google::Apis::BigqueryV2::DatasetReference.new( project_id: pjt, dataset_id: dts ) end end
def delete_dataset dataset_id, force = nil
Immediately after deletion, you can create another dataset with
either manually or by specifying force: true in options.
Before you can delete a dataset, you must delete all its tables,
Deletes the dataset specified by the datasetId value.
#
def delete_dataset dataset_id, force = nil execute do service.delete_dataset @project, dataset_id, delete_contents: force end end
def delete_job job_id, location: nil
#
def delete_job job_id, location: nil execute do service.delete_job @project, job_id, location: location end end
def delete_model dataset_id, model_id
Deletes the model specified by modelId from the dataset.
#
def delete_model dataset_id, model_id execute { service.delete_model @project, dataset_id, model_id } end
def delete_routine dataset_id, routine_id
#
def delete_routine dataset_id, routine_id execute { service.delete_routine @project, dataset_id, routine_id } end
def delete_table dataset_id, table_id
Deletes the table specified by tableId from the dataset.
#
def delete_table dataset_id, table_id execute { service.delete_table @project, dataset_id, table_id } end
def execute backoff: nil, &block
def execute backoff: nil, &block if backoff Backoff.new(retries: retries).execute(&block) else yield end rescue Google::Apis::Error => e raise Google::Cloud::Error.from_error e end
def extract_table extract_job_gapi
def extract_table extract_job_gapi execute backoff: true do service.insert_job @project, extract_job_gapi end end
def generate_id
def generate_id SecureRandom.urlsafe_base64 21 end
def get_dataset dataset_id, access_policy_version: nil, dataset_view: nil
#
def get_dataset dataset_id, access_policy_version: nil, dataset_view: nil get_project_dataset @project, dataset_id, access_policy_version: access_policy_version, view: dataset_view end
def get_job job_id, location: nil
#
def get_job job_id, location: nil # The get operation is considered idempotent execute backoff: true do service.get_job @project, job_id, location: location end end
def get_model dataset_id, model_id
Gets the specified model resource by model ID. This method does not return the data in the model, it only
def get_model dataset_id, model_id get_project_model @project, dataset_id, model_id end
def get_project_dataset project_id, dataset_id, access_policy_version: nil, dataset_view: nil
#
def get_project_dataset project_id, dataset_id, access_policy_version: nil, dataset_view: nil # The get operation is considered idempotent execute backoff: true do service.get_dataset project_id, dataset_id, access_policy_version: access_policy_version, view: dataset_view end end
def get_project_model project_id, dataset_id, model_id
def get_project_model project_id, dataset_id, model_id # The get operation is considered idempotent execute backoff: true do json_txt = service.get_model project_id, dataset_id, model_id, options: { skip_deserialization: true } JSON.parse json_txt, symbolize_names: true end end
def get_project_table project_id, dataset_id, table_id, metadata_view: nil
#
def get_project_table project_id, dataset_id, table_id, metadata_view: nil metadata_view = table_metadata_view_type_for metadata_view # The get operation is considered idempotent execute backoff: true do service.get_table project_id, dataset_id, table_id, view: metadata_view end end
def get_routine dataset_id, routine_id
#
def get_routine dataset_id, routine_id # The get operation is considered idempotent execute backoff: true do service.get_routine @project, dataset_id, routine_id end end
def get_table dataset_id, table_id, metadata_view: nil
it only returns the table resource,
This method does not return the data in the table,
Gets the specified table resource by table ID.
#
def get_table dataset_id, table_id, metadata_view: nil get_project_table @project, dataset_id, table_id, metadata_view: metadata_view end
def get_table_policy dataset_id, table_id
#
def get_table_policy dataset_id, table_id policy_options = API::GetPolicyOptions.new requested_policy_version: 1 execute do service.get_table_iam_policy table_path(dataset_id, table_id), API::GetIamPolicyRequest.new(options: policy_options) end end
def initialize project, credentials,
#
def initialize project, credentials, retries: nil, timeout: nil, host: nil, quota_project: nil, universe_domain: nil @project = project @credentials = credentials @retries = retries @timeout = timeout @host = host @quota_project = quota_project @universe_domain = universe_domain end
def insert_dataset new_dataset_gapi, access_policy_version: nil
#
def insert_dataset new_dataset_gapi, access_policy_version: nil execute { service.insert_dataset @project, new_dataset_gapi, access_policy_version: access_policy_version } end
def insert_job config, location: nil
def insert_job config, location: nil job_object = API::Job.new job_reference: job_ref_from(nil, nil, location: location), configuration: config # Jobs have generated id, so this operation is considered idempotent execute backoff: true do service.insert_job @project, job_object end end
def insert_routine dataset_id, new_routine_gapi
#
def insert_routine dataset_id, new_routine_gapi execute { service.insert_routine @project, dataset_id, new_routine_gapi } end
def insert_table dataset_id, new_table_gapi
#
def insert_table dataset_id, new_table_gapi execute { service.insert_table @project, dataset_id, new_table_gapi } end
def insert_tabledata dataset_id, table_id, rows, insert_ids: nil, ignore_unknown: nil,
def insert_tabledata dataset_id, table_id, rows, insert_ids: nil, ignore_unknown: nil, skip_invalid: nil, project_id: nil json_rows = Array(rows).map { |row| Convert.to_json_row row } insert_tabledata_json_rows dataset_id, table_id, json_rows, insert_ids: insert_ids, ignore_unknown: ignore_unknown, skip_invalid: skip_invalid, project_id: project_id end
def insert_tabledata_json_rows dataset_id, table_id, json_rows, insert_ids: nil, ignore_unknown: nil,
def insert_tabledata_json_rows dataset_id, table_id, json_rows, insert_ids: nil, ignore_unknown: nil, skip_invalid: nil, project_id: nil rows_and_ids = Array(json_rows).zip Array(insert_ids) insert_rows = rows_and_ids.map do |json_row, insert_id| if insert_id == :skip { json: json_row } else insert_id ||= SecureRandom.uuid { insertId: insert_id, json: json_row } end end insert_req = { rows: insert_rows, ignoreUnknownValues: ignore_unknown, skipInvalidRows: skip_invalid }.to_json # The insertAll with insertId operation is considered idempotent project_id ||= @project execute backoff: true do service.insert_all_table_data( project_id, dataset_id, table_id, insert_req, options: { skip_serialization: true } ) end end
def inspect
def inspect "#{self.class}(#{@project})" end
def job_query_results job_id, location: nil, max: nil, token: nil,
#
def job_query_results job_id, location: nil, max: nil, token: nil, start: nil, timeout: nil, format_options_use_int64_timestamp: nil # The get operation is considered idempotent execute backoff: true do service.get_job_query_results @project, job_id, location: location, max_results: max, page_token: token, start_index: start, timeout_ms: timeout, format_options_use_int64_timestamp: format_options_use_int64_timestamp end end
def job_ref_from job_id, prefix, location: nil
anyway, for idempotent retry in the google-api-client layer.
If no job_id or prefix is given, always generate a client-side job ID
def job_ref_from job_id, prefix, location: nil prefix ||= "job_" job_id ||= "#{prefix}#{generate_id}" job_ref = API::JobReference.new project_id: @project, job_id: job_id # BigQuery does not allow nil location, but missing is ok. job_ref.location = location if location job_ref end
def list_datasets all: nil, filter: nil, max: nil, token: nil
Lists all datasets in the specified project to which you have
#
def list_datasets all: nil, filter: nil, max: nil, token: nil # The list operation is considered idempotent execute backoff: true do service.list_datasets @project, all: all, filter: filter, max_results: max, page_token: token end end
def list_jobs all: nil, token: nil, max: nil, filter: nil, min_created_at: nil, max_created_at: nil,
Lists all jobs in the specified project to which you have
#
def list_jobs all: nil, token: nil, max: nil, filter: nil, min_created_at: nil, max_created_at: nil, parent_job_id: nil # The list operation is considered idempotent min_creation_time = Convert.time_to_millis min_created_at max_creation_time = Convert.time_to_millis max_created_at execute backoff: true do service.list_jobs @project, all_users: all, max_results: max, page_token: token, projection: "full", state_filter: filter, min_creation_time: min_creation_time, max_creation_time: max_creation_time, parent_job_id: parent_job_id end end
def list_models dataset_id, max: nil, token: nil
Lists all models in the specified dataset.
#
def list_models dataset_id, max: nil, token: nil options = { skip_deserialization: true } # The list operation is considered idempotent execute backoff: true do json_txt = service.list_models @project, dataset_id, max_results: max, page_token: token, options: options JSON.parse json_txt, symbolize_names: true end end
def list_projects max: nil, token: nil
#
def list_projects max: nil, token: nil execute backoff: true do service.list_projects max_results: max, page_token: token end end
def list_routines dataset_id, max: nil, token: nil, filter: nil
Unless readMask is set in the request, only the following fields are populated:
Requires the READER dataset role.
Lists all routines in the specified dataset.
#
def list_routines dataset_id, max: nil, token: nil, filter: nil # The list operation is considered idempotent execute backoff: true do service.list_routines @project, dataset_id, max_results: max, page_token: token, filter: filter end end
def list_tabledata dataset_id, table_id, max: nil, token: nil, start: nil,
#
def list_tabledata dataset_id, table_id, max: nil, token: nil, start: nil, format_options_use_int64_timestamp: nil # The list operation is considered idempotent execute backoff: true do json_txt = service.list_table_data \ @project, dataset_id, table_id, max_results: max, page_token: token, start_index: start, options: { skip_deserialization: true }, format_options_use_int64_timestamp: format_options_use_int64_timestamp JSON.parse json_txt, symbolize_names: true end end
def list_tables dataset_id, max: nil, token: nil
Lists all tables in the specified dataset.
#
def list_tables dataset_id, max: nil, token: nil # The list operation is considered idempotent execute backoff: true do service.list_tables @project, dataset_id, max_results: max, page_token: token end end
def load_table_file file, load_job_gapi
def load_table_file file, load_job_gapi execute backoff: true do service.insert_job @project, load_job_gapi, upload_source: file, content_type: mime_type_for(file) end end
def load_table_gs_url load_job_gapi
def load_table_gs_url load_job_gapi execute backoff: true do service.insert_job @project, load_job_gapi end end
def mime_type_for file
def mime_type_for file mime_type = MiniMime.lookup_by_filename Pathname(file).to_path return nil if mime_type.nil? mime_type.content_type rescue StandardError nil end
def patch_dataset dataset_id, patched_dataset_gapi, access_policy_version: nil, update_mode: nil
Updates information in an existing dataset, only replacing
#
def patch_dataset dataset_id, patched_dataset_gapi, access_policy_version: nil, update_mode: nil patch_with_backoff = false options = {} if patched_dataset_gapi.etag options[:header] = { "If-Match" => patched_dataset_gapi.etag } # The patch with etag operation is considered idempotent patch_with_backoff = true end execute backoff: patch_with_backoff do service.patch_dataset @project, dataset_id, patched_dataset_gapi, options: options, olicy_version: access_policy_version, update_mode: update_mode end end
def patch_model dataset_id, model_id, patched_model_gapi, etag = nil
Updates information in an existing model, replacing fields that
#
def patch_model dataset_id, model_id, patched_model_gapi, etag = nil patch_with_backoff = false options = { skip_deserialization: true } if etag options[:header] = { "If-Match" => etag } # The patch with etag operation is considered idempotent patch_with_backoff = true end execute backoff: patch_with_backoff do json_txt = service.patch_model @project, dataset_id, model_id, patched_model_gapi, options: options JSON.parse json_txt, symbolize_names: true end end
def patch_table dataset_id, table_id, patched_table_gapi
Updates information in an existing table, replacing fields that
#
def patch_table dataset_id, table_id, patched_table_gapi patch_with_backoff = false options = {} if patched_table_gapi.etag options[:header] = { "If-Match" => patched_table_gapi.etag } # The patch with etag operation is considered idempotent patch_with_backoff = true end execute backoff: patch_with_backoff do service.patch_table @project, dataset_id, table_id, patched_table_gapi, options: options end end
def project_service_account
def project_service_account service.get_project_service_account project end
def query_job query_job_gapi
def query_job query_job_gapi execute backoff: true do service.insert_job @project, query_job_gapi end end
def service
def service return mocked_service if mocked_service @service ||= begin service = API::BigqueryService.new service.client_options.application_name = "gcloud-ruby" service.client_options.application_version = Google::Cloud::Bigquery::VERSION service.client_options.open_timeout_sec = timeout service.client_options.read_timeout_sec = timeout service.client_options.send_timeout_sec = timeout service.request_options.retries = 0 # handle retries in #execute service.request_options.header ||= {} service.request_options.header["x-goog-api-client"] = "gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}" service.request_options.query ||= {} service.request_options.query["prettyPrint"] = false service.request_options.quota_project = @quota_project if @quota_project service.authorization = @credentials.client service.universe_domain = @universe_domain service.root_url = host if host begin service.verify_universe_domain! rescue Google::Apis::UniverseDomainError => e # TODO: Create a Google::Cloud::Error subclass for this. raise Google::Cloud::Error, e.message end service end end
def set_table_policy dataset_id, table_id, new_policy
-
new_policy(Google::Apis::BigqueryV2::Policy) --
def set_table_policy dataset_id, table_id, new_policy execute do service.set_table_iam_policy table_path(dataset_id, table_id), API::SetIamPolicyRequest.new(policy: new_policy) end end
def table_metadata_view_type_for str
def table_metadata_view_type_for str return nil if str.nil? { "unspecified" => "TABLE_METADATA_VIEW_UNSPECIFIED", "basic" => "BASIC", "storage" => "STORAGE_STATS", "full" => "FULL" }[str.to_s.downcase] end
def table_path dataset_id, table_id
def table_path dataset_id, table_id "projects/#{@project}/datasets/#{dataset_id}/tables/#{table_id}" end
def test_table_permissions dataset_id, table_id, permissions
#
def test_table_permissions dataset_id, table_id, permissions execute do service.test_table_iam_permissions table_path(dataset_id, table_id), API::TestIamPermissionsRequest.new(permissions: permissions) end end
def universe_domain
- Private: -
def universe_domain service.universe_domain end
def update_routine dataset_id, routine_id, new_routine_gapi
#
def update_routine dataset_id, routine_id, new_routine_gapi update_with_backoff = false options = {} if new_routine_gapi.etag options[:header] = { "If-Match" => new_routine_gapi.etag } # The update with etag operation is considered idempotent update_with_backoff = true end execute backoff: update_with_backoff do service.update_routine @project, dataset_id, routine_id, new_routine_gapi, options: options end end