module ActiveRecord::Batches
def act_on_ignored_order(error_on_ignore)
def act_on_ignored_order(error_on_ignore) raise_error = (error_on_ignore.nil? ? ActiveRecord.error_on_ignored_order : error_on_ignore) if raise_error raise ArgumentError.new(ORDER_IGNORE_MESSAGE) elsif logger logger.warn(ORDER_IGNORE_MESSAGE) end end
def apply_finish_limit(relation, finish, order)
def apply_finish_limit(relation, finish, order) relation.where(predicate_builder[primary_key, finish, order == :desc ? :gteq : :lteq]) end
def apply_limits(relation, start, finish, order)
def apply_limits(relation, start, finish, order) relation = apply_start_limit(relation, start, order) if start relation = apply_finish_limit(relation, finish, order) if finish relation end
def apply_start_limit(relation, start, order)
def apply_start_limit(relation, start, order) relation.where(predicate_builder[primary_key, start, order == :desc ? :lteq : :gteq]) end
def batch_order(order)
def batch_order(order) table[primary_key].public_send(order) end
def find_each(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil, order: :asc, &block)
NOTE: By its nature, batch processing is subject to race conditions if
orderable (e.g. an integer or string).
This also means that this method only works when the primary key is
ascending on the primary key ("id ASC").
NOTE: Order can be ascending (:asc) or descending (:desc). It is automatically set to
end
person.party_all_night!
Person.find_each(start: 10_000) do |person|
# In worker 2, let's process from record 10_000 and onwards.
end
person.party_all_night!
Person.find_each(finish: 9_999) do |person|
# In worker 1, let's process until 9999 records.
option on each worker.
handle from 10000 and beyond by setting the +:start+ and +:finish+
worker 1 handle all the records between id 1 and 9999 and worker 2
multiple workers dealing with the same processing queue. You can make
The options +start+ and +finish+ are especially useful if you want
size: it can be less than, equal to, or greater than the limit.
Limits are honored, and if present there is no requirement for the batch
* :order - Specifies the primary key order (can be +:asc+ or +:desc+). Defaults to +:asc+.
an order is present in the relation.
* :error_on_ignore - Overrides the application config to specify if an error should be raised when
* :finish - Specifies the primary key value to end at, inclusive of the value.
* :start - Specifies the primary key value to start from, inclusive of the value.
* :batch_size - Specifies the size of the batch. Defaults to 1000.
==== Options
end
person.award_trophy(index + 1)
Person.find_each.with_index do |person, index|
for chaining with other methods:
If you do not provide a block to #find_each, it will return an Enumerator
end
person.party_all_night!
Person.where("age > 21").find_each do |person|
end
person.do_awesome_stuff
Person.find_each do |person|
specified by the +:batch_size+ option).
The #find_each method uses #find_in_batches with a batch size of 1000 (or as
with the records in batches, thereby greatly reducing memory consumption.
In that case, batch processing methods allow you to work
is very inefficient since it will try to instantiate all the objects at once.
(using the Scoping::Named::ClassMethods.all method, for example)
Looping through a collection of records from the database
def find_each(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil, order: :asc, &block) if block_given? find_in_batches(start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore, order: order) do |records| records.each(&block) end else enum_for(:find_each, start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore, order: order) do relation = self apply_limits(relation, start, finish, order).size end end end
def find_in_batches(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil, order: :asc)
NOTE: By its nature, batch processing is subject to race conditions if
orderable (e.g. an integer or string).
This also means that this method only works when the primary key is
ascending on the primary key ("id ASC").
NOTE: Order can be ascending (:asc) or descending (:desc). It is automatically set to
end
group.each { |person| person.party_all_night! }
Person.find_in_batches(start: 10_000) do |group|
# Let's process from record 10_000 on.
option on each worker.
handle from 10000 and beyond by setting the +:start+ and +:finish+
worker 1 handle all the records between id 1 and 9999 and worker 2
multiple workers dealing with the same processing queue. You can make
The options +start+ and +finish+ are especially useful if you want
size: it can be less than, equal to, or greater than the limit.
Limits are honored, and if present there is no requirement for the batch
* :order - Specifies the primary key order (can be +:asc+ or +:desc+). Defaults to +:asc+.
an order is present in the relation.
* :error_on_ignore - Overrides the application config to specify if an error should be raised when
* :finish - Specifies the primary key value to end at, inclusive of the value.
* :start - Specifies the primary key value to start from, inclusive of the value.
* :batch_size - Specifies the size of the batch. Defaults to 1000.
==== Options
To be yielded each record one by one, use #find_each instead.
end
group.each(&:recover_from_last_night!)
puts "Processing group ##{batch}"
Person.find_in_batches.with_index do |group, batch|
for chaining with other methods:
If you do not provide a block to #find_in_batches, it will return an Enumerator
end
group.each { |person| person.party_all_night! }
sleep(50) # Make sure it doesn't get too crowded in there!
Person.where("age > 21").find_in_batches do |group|
an array.
Yields each batch of records that was found by the find options as
def find_in_batches(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil, order: :asc) relation = self unless block_given? return to_enum(:find_in_batches, start: start, finish: finish, batch_size: batch_size, error_on_ignore: error_on_ignore, order: order) do total = apply_limits(relation, start, finish, order).size (total - 1).div(batch_size) + 1 end end in_batches(of: batch_size, start: start, finish: finish, load: true, error_on_ignore: error_on_ignore, order: order) do |batch| yield batch.to_a end end
def in_batches(of: 1000, start: nil, finish: nil, load: false, error_on_ignore: nil, order: :asc)
NOTE: By its nature, batch processing is subject to race conditions if
orderable (e.g. an integer or string).
This also means that this method only works when the primary key is
ascending on the primary key ("id ASC").
NOTE: Order can be ascending (:asc) or descending (:desc). It is automatically set to
Person.in_batches.each_record(&:party_all_night!)
#each_record on the yielded BatchEnumerator:
NOTE: If you are going to iterate through each record, you should call
end
relation.where('age <= 21').delete_all
relation.where('age > 21').update_all(should_party: true)
relation.update_all('age = age + 1')
Person.in_batches.each do |relation|
An example of calling where query method on the relation:
Person.in_batches(start: 10_000).update_all(awesome: true)
# Let's process from record 10_000 on.
option on each worker.
handle from 10000 and beyond by setting the +:start+ and +:finish+
worker 1 handle all the records between id 1 and 9999 and worker 2
multiple workers dealing with the same processing queue. You can make
The options +start+ and +finish+ are especially useful if you want
size, it can be less than, equal, or greater than the limit.
Limits are honored, and if present there is no requirement for the batch
* :order - Specifies the primary key order (can be +:asc+ or +:desc+). Defaults to +:asc+.
an order is present in the relation.
* :error_on_ignore - Overrides the application config to specify if an error should be raised when
* :finish - Specifies the primary key value to end at, inclusive of the value.
* :start - Specifies the primary key value to start from, inclusive of the value.
* :load - Specifies if the relation should be loaded. Defaults to false.
* :of - Specifies the size of the batch. Defaults to 1000.
==== Options
Person.in_batches.each_record(&:party_all_night!)
Person.in_batches.update_all(awesome: true)
Person.in_batches.delete_all
Examples of calling methods on the returned BatchEnumerator object:
end
relation.delete_all
puts "Processing relation ##{batch_index}"
Person.in_batches.each_with_index do |relation, batch_index|
BatchEnumerator which is enumerable.
If you do not provide a block to #in_batches, it will return a
end
sleep(10) # Throttle the delete queries
relation.delete_all
Person.where("age > 21").in_batches do |relation|
Yields ActiveRecord::Relation objects to work with a batch of records.
def in_batches(of: 1000, start: nil, finish: nil, load: false, error_on_ignore: nil, order: :asc) relation = self unless block_given? return BatchEnumerator.new(of: of, start: start, finish: finish, relation: self) end unless [:asc, :desc].include?(order) raise ArgumentError, ":order must be :asc or :desc, got #{order.inspect}" end if arel.orders.present? act_on_ignored_order(error_on_ignore) end batch_limit = of if limit_value remaining = limit_value batch_limit = remaining if remaining < batch_limit end relation = relation.reorder(batch_order(order)).limit(batch_limit) relation = apply_limits(relation, start, finish, order) relation.skip_query_cache! # Retaining the results in the query cache would undermine the point of batching batch_relation = relation loop do if load records = batch_relation.records ids = records.map(&:id) yielded_relation = where(primary_key => ids) yielded_relation.load_records(records) else ids = batch_relation.pluck(primary_key) yielded_relation = where(primary_key => ids) end break if ids.empty? primary_key_offset = ids.last raise ArgumentError.new("Primary key not included in the custom select clause") unless primary_key_offset yield yielded_relation break if ids.length < batch_limit if limit_value remaining -= ids.length if remaining == 0 # Saves a useless iteration when the limit is a multiple of the # batch size. break elsif remaining < batch_limit relation = relation.limit(remaining) end end batch_relation = relation.where( predicate_builder[primary_key, primary_key_offset, order == :desc ? :lt : :gt] ) end end