class ActiveRecord::Base
def self.synchronize(instances, keys=[self.primary_key])
posts.first.address # => "1245 Foo Ln" instead of whatever it was
Post.synchronize posts, [:name] # queries on the :name column and not the :id column
<.. out of system changes occur to change the address of author 'Zach' to 1245 Foo Ln ..>
posts = Post.find_by_author("Zach")
# Synchronizing using custom key fields
posts.first.author # => "Zachary" instead of Zach
Post.synchronize posts
<.. out of system changes occur to change author name from Zach to Zachary..>
posts = Post.find_by_author("Zach")
# Synchronizing existing models by matching on the primary key field
== Examples
instances rather sending one query for each instance
This uses one query for all instance updates and then updates existing
ActiveRecord instance but it is intended for use on multiple instances.
from the database. This is like calling reload on an individual
Synchronizes the passed in ActiveRecord instances with data
def self.synchronize(instances, keys=[self.primary_key]) return if instances.empty? conditions = {} order = "" key_values = keys.map { |key| instances.map(&"#{key}".to_sym) } keys.zip(key_values).each { |key, values| conditions[key] = values } order = keys.map{ |key| "#{key} ASC" }.join(",") klass = instances.first.class fresh_instances = klass.find( :all, :conditions=>conditions, :order=>order ) instances.each do |instance| matched_instance = fresh_instances.detect do |fresh_instance| keys.all?{ |key| fresh_instance.send(key) == instance.send(key) } end if matched_instance instance.clear_aggregation_cache instance.clear_association_cache instance.instance_variable_set '@attributes', matched_instance.attributes # Since the instance now accurately reflects the record in # the database, ensure that instance.persisted? is true. instance.instance_variable_set '@new_record', false instance.instance_variable_set '@destroyed', false end end end
def add_special_rails_stamps( column_names, array_of_attributes, options )
def add_special_rails_stamps( column_names, array_of_attributes, options ) AREXT_RAILS_COLUMNS[:create].each_pair do |key, blk| if self.column_names.include?(key) value = blk.call if index=column_names.index(key) # replace every instance of the array of attributes with our value array_of_attributes.each{ |arr| arr[index] = value } else column_names << key array_of_attributes.each { |arr| arr << value } end end end AREXT_RAILS_COLUMNS[:update].each_pair do |key, blk| if self.column_names.include?(key) value = blk.call if index=column_names.index(key) # replace every instance of the array of attributes with our value array_of_attributes.each{ |arr| arr[index] = value } else column_names << key array_of_attributes.each { |arr| arr << value } end if supports_on_duplicate_key_update? if options[:on_duplicate_key_update] options[:on_duplicate_key_update] << key.to_sym if options[:on_duplicate_key_update].is_a?(Array) options[:on_duplicate_key_update][key.to_sym] = key.to_sym if options[:on_duplicate_key_update].is_a?(Hash) else options[:on_duplicate_key_update] = [ key.to_sym ] end end end end end
def establish_connection_with_activerecord_import(*args)
def establish_connection_with_activerecord_import(*args) establish_connection_without_activerecord_import(*args) ActiveSupport.run_load_hooks(:active_record_connection_established, connection_pool) end
def import( *args )
* failed_instances - an array of objects that fails validation and were not committed to the database. An empty array if no validation is performed.
This returns an object which responds to +failed_instances+ and +num_inserts+.
= Returns
BlogPost.import columns, attributes, :on_duplicate_key_update=>{ :title => :title }
model. Below is an example:
control over what fields are updated with what attributes on your
to model attribute name mappings. This gives you finer grained
The :on_duplicate_key_update option can be a hash of column name
==== Using A Hash
BlogPost.import columns, values, :on_duplicate_key_update=>[ :date_modified, :content, :author ]
a duplicate record is found. Below is an example:
names. The column names are the only fields that are updated if
The :on_duplicate_key_update option can be an array of column
==== Using an Array
The :on_duplicate_key_update option can be either an Array or a Hash.
== On Duplicate Key Update (MySQL only)
puts posts.first.persisted? # => true
BlogPost.import posts, :synchronize => posts, :synchronize_keys => [:title]
posts = [BlogPost.new(:title => "Foo"), BlogPost.new(:title => "Bar")]
# Example synchronizing unsaved/new instances in memory by using a uniqued imported field
puts post.author_name # => 'yoda'
BlogPost.import posts, :synchronize=>[ post ]
values = [ [ 'yoda', 'test post' ] ]
columns = [ :author_name, :title ]
puts post.author_name # => 'zdennis'
post = BlogPost.find_by_author_name( 'zdennis' )
# Example synchronizing existing instances in memory
BlogPost.import( columns, values, :validate => false )
values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ]
columns = [ :author_name, :title ]
# Example using column_names, array_of_value and options
BlogPost.import columns, values
values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ]
columns = [ :author_name, :title ]
# Example using column_names and array_of_values
BlogPost.import posts
BlogPost.new :author_name=>'Zach Dennis', :title=>'AREXT3' ]
BlogPost.new :author_name=>'Zach Dennis', :title=>'AREXT2',
posts = [ BlogPost.new :author_name=>'Zach Dennis', :title=>'AREXT',
# Example using array of model objects
class BlogPost < ActiveRecord::Base ; end
== Examples
(if false) even if record timestamps is disabled in ActiveRecord::Base
* +timestamps+ - true|false, tells import to not add timestamps \
existing model instances in memory with updates from the import.
that you are currently importing data into. This synchronizes
* +synchronize+ - an array of ActiveRecord instances for the model
Key Update below.
use MySQL's ON DUPLICATE KEY UPDATE ability. See On Duplicate\
* +on_duplicate_key_update+ - an Array or Hash, tells import to \
ActiveRecord validations. Validations are enforced by default.
* +validate+ - true|false, tells import whether or not to use \
== Options
below for what +options+ are available.
parameter, +options+, is a hash. This is optional. Please see
The first two parameters are the same as the above form. The third
==== Model.import column_names, array_of_values, options
the order of the +column_names+.
record. The order of values in each subarray should match up to
arrays. Each subarray is a single set of values for a new
The second parameter, +array_of_values+, is an array of
strings which specify the columns that you want to update.
The first parameter +column_names+ is an array of symbols or
==== Model.import column_names, array_of_values
objects that you want updated.
With this form you can call _import_ passing in an array of model
==== Model.import array_of_models
Model.import column_names, array_of_values, options
Model.import column_names, array_of_values
Model.import array_of_models
== Usage
performing the import.
the ActiveRecord::Callbacks during creation/modification while
This can be used with or without validations. It does not utilize
inserted.
about having ActiveRecord objects returned for each record
you want to create more than one record at a time and do not care
ActiveRecord::Base#save multiple times. This method works well if
This is more efficient than using ActiveRecord::Base#create or
Imports a collection of values to the database.
def import( *args ) options = { :validate=>true, :timestamps=>true } options.merge!( args.pop ) if args.last.is_a? Hash is_validating = options.delete( :validate ) # assume array of model objects if args.last.is_a?( Array ) and args.last.first.is_a? ActiveRecord::Base if args.length == 2 models = args.last column_names = args.first else models = args.first column_names = self.column_names.dup end array_of_attributes = models.map do |model| # this next line breaks sqlite.so with a segmentation fault # if model.new_record? || options[:on_duplicate_key_update] column_names.map do |name| model.send( "#{name}_before_type_cast" ) end # end end # supports empty array elsif args.last.is_a?( Array ) and args.last.empty? return ActiveRecord::Import::Result.new([], 0) if args.last.empty? # supports 2-element array and array elsif args.size == 2 and args.first.is_a?( Array ) and args.last.is_a?( Array ) column_names, array_of_attributes = args else raise ArgumentError.new( "Invalid arguments!" ) end # dup the passed in array so we don't modify it unintentionally array_of_attributes = array_of_attributes.dup # Force the primary key col into the insert if it's not # on the list and we are using a sequence and stuff a nil # value for it into each row so the sequencer will fire later if !column_names.include?(primary_key) && connection.prefetch_primary_key? && sequence_name column_names << primary_key array_of_attributes.each { |a| a << nil } end # record timestamps unless disabled in ActiveRecord::Base if record_timestamps && options.delete( :timestamps ) add_special_rails_stamps column_names, array_of_attributes, options end return_obj = if is_validating import_with_validations( column_names, array_of_attributes, options ) else num_inserts = import_without_validations_or_callbacks( column_names, array_of_attributes, options ) ActiveRecord::Import::Result.new([], num_inserts) end if options[:synchronize] sync_keys = options[:synchronize_keys] || [self.primary_key] synchronize( options[:synchronize], sync_keys) end return_obj.num_inserts = 0 if return_obj.num_inserts.nil? return_obj end
def import_from_table( options ) # :nodoc:
TODO import_from_table needs to be implemented.
def import_from_table( options ) # :nodoc: end
def import_with_validations( column_names, array_of_attributes, options={} )
ActiveRecord::Base.import for more information on
+num_inserts+ is the number of inserts it took to import the data. See
+failed_instances+ is an array of instances that failed validations.
object with the methods +failed_instances+ and +num_inserts+.
given the passed in +options+ Hash with validations. Returns an
Imports the passed in +column_names+ and +array_of_attributes+
def import_with_validations( column_names, array_of_attributes, options={} ) failed_instances = [] # create instances for each of our column/value sets arr = validations_array_for_column_names_and_attributes( column_names, array_of_attributes ) # keep track of the instance and the position it is currently at. if this fails # validation we'll use the index to remove it from the array_of_attributes arr.each_with_index do |hsh,i| instance = new do |model| hsh.each_pair{ |k,v| model.send("#{k}=", v) } end if not instance.valid? array_of_attributes[ i ] = nil failed_instances << instance end end array_of_attributes.compact! num_inserts = if array_of_attributes.empty? || options[:all_or_none] && failed_instances.any? 0 else import_without_validations_or_callbacks( column_names, array_of_attributes, options ) end ActiveRecord::Import::Result.new(failed_instances, num_inserts) end
def import_without_validations_or_callbacks( column_names, array_of_attributes, options={} )
information on +column_names+, +array_of_attributes_ and
validations or callbacks. See ActiveRecord::Base.import for more
of insert operations it took to create these records without
given the passed in +options+ Hash. This will return the number
Imports the passed in +column_names+ and +array_of_attributes+
def import_without_validations_or_callbacks( column_names, array_of_attributes, options={} ) column_names = column_names.map(&:to_sym) scope_columns, scope_values = scope_attributes.to_a.transpose unless scope_columns.blank? scope_columns.zip(scope_values).each do |name, value| next if column_names.include?(name.to_sym) column_names << name array_of_attributes.each { |attrs| attrs << value } end end columns = column_names.each_with_index.map do |name, i| column = columns_hash[name.to_s] raise ActiveRecord::Import::MissingColumnError.new(name.to_s, i) if column.nil? column end columns_sql = "(#{column_names.map{|name| connection.quote_column_name(name) }.join(',')})" insert_sql = "INSERT #{options[:ignore] ? 'IGNORE ':''}INTO #{quoted_table_name} #{columns_sql} VALUES " values_sql = values_sql_for_columns_and_attributes(columns, array_of_attributes) if not supports_import? number_inserted = 0 values_sql.each do |values| connection.execute(insert_sql + values) number_inserted += 1 end else # generate the sql post_sql_statements = connection.post_sql_statements( quoted_table_name, options ) # perform the inserts number_inserted = connection.insert_many( [ insert_sql, post_sql_statements ].flatten, values_sql, "#{self.class.name} Create Many Without Validations Or Callbacks" ) end number_inserted end
def supports_import?(*args)
Returns true if the current database connection adapter
def supports_import?(*args) connection.supports_import?(*args) rescue NoMethodError false end
def supports_on_duplicate_key_update?
supports on duplicate key update functionality, otherwise
Returns true if the current database connection adapter
def supports_on_duplicate_key_update? connection.supports_on_duplicate_key_update? rescue NoMethodError false end
def synchronize(instances, key=[ActiveRecord::Base.primary_key])
def synchronize(instances, key=[ActiveRecord::Base.primary_key]) self.class.synchronize(instances, key) end
def validations_array_for_column_names_and_attributes( column_names, array_of_attributes ) # :nodoc:
Returns an Array of Hashes for the passed in +column_names+ and +array_of_attributes+.
def validations_array_for_column_names_and_attributes( column_names, array_of_attributes ) # :nodoc: array_of_attributes.map do |attributes| Hash[attributes.each_with_index.map {|attr, c| [column_names[c], attr] }] end end
def values_sql_for_columns_and_attributes(columns, array_of_attributes) # :nodoc:
and +array_of_attributes+.
Returns SQL the VALUES for an INSERT statement given the passed in +columns+
def values_sql_for_columns_and_attributes(columns, array_of_attributes) # :nodoc: # connection gets called a *lot* in this high intensity loop. # Reuse the same one w/in the loop, otherwise it would keep being re-retreived (= lots of time for large imports) connection_memo = connection array_of_attributes.map do |arr| my_values = arr.each_with_index.map do |val,j| column = columns[j] # be sure to query sequence_name *last*, only if cheaper tests fail, because it's costly if val.nil? && column.name == primary_key && !sequence_name.blank? connection_memo.next_value_for_sequence(sequence_name) else if serialized_attributes.include?(column.name) connection_memo.quote(serialized_attributes[column.name].dump(val), column) else connection_memo.quote(val, column) end end end "(#{my_values.join(',')})" end end