paper-trail-gem · jaredbeck · Mar 14, 2019 · Mar 8, 2019 · jaredbeck · Mar 12, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,13 @@ recommendations of [keepachangelog.com](http://keepachangelog.com/).
 - [#1184](/~https://github.com/paper-trail-gem/paper_trail/pull/1184) -
   No need to calculate previous values of skipped attributes
 
+- [#1188](/~https://github.com/paper-trail-gem/paper_trail/pull/1188) -
+  Optimized the memory allocations during the building of every particular
+  Version object. That can help a lot for heavy bulk processing.
+  In additional we advise to use `json[b]` DB types for `object`
+  and `object_changes` Version columns, in order to reach best possible
+  RAM performance.
+
 ## 10.2.0 (2019-01-31)
 
 ### Breaking Changes

diff --git a/lib/paper_trail/events/base.rb b/lib/paper_trail/events/base.rb
@@ -60,15 +60,28 @@ def attribute_changed_in_latest_version?(attr_name)
 
       # @api private
       def nonskipped_attributes_before_change(is_touch)
-        record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])
+        cache_changed_attributes do
+          record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])
 
-        Hash[record_attributes.map do |k, v|
-          if @record.class.column_names.include?(k)
-            [k, attribute_in_previous_version(k, is_touch)]
-          else
-            [k, v]
+          record_attributes.each_key do |k|
+            if @record.class.column_names.include?(k)
+              record_attributes[k] = attribute_in_previous_version(k, is_touch)
+            end
           end
-        end]
+        end
+      end
+
+      # Rails 5.1 changed the API of `ActiveRecord::Dirty`.
+      # @api private
+      def cache_changed_attributes
+        if RAILS_GTE_5_1
+          # Everything works fine as it is
+          yield
+        else
+          # Any particular call to `changed_attributes` produces the huge memory allocation.
+          # Lets use the generic AR workaround for that.
+          @record.send(:cache_changed_attributes) { yield }
+        end
       end
 
       # Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
@@ -110,18 +123,22 @@ def changed_and_not_ignored
 
       # @api private
       def changed_in_latest_version
-        changes_in_latest_version.keys
+        # Memoized to reduce memory usage
+        @changed_in_latest_version ||= changes_in_latest_version.keys
       end
 
       # Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
       # /~https://github.com/paper-trail-gem/paper_trail/pull/899
       #
       # @api private
       def changes_in_latest_version
-        if @in_after_callback && RAILS_GTE_5_1
-          @record.saved_changes
-        else
-          @record.changes
+        # Memoized to reduce memory usage
+        @changes_in_latest_version ||= begin
+          if @in_after_callback && RAILS_GTE_5_1
+            @record.saved_changes
+          else
+            @record.changes
+          end
         end
       end
 
@@ -202,16 +219,19 @@ def notable_changes
 
       # @api private
       def notably_changed
-        only = @record.paper_trail_options[:only].dup
-        # Remove Hash arguments and then evaluate whether the attributes (the
-        # keys of the hash) should also get pushed into the collection.
-        only.delete_if do |obj|
-          obj.is_a?(Hash) &&
-            obj.each { |attr, condition|
-              only << attr if condition.respond_to?(:call) && condition.call(@record)
-            }
+        # Memoized to reduce memory usage
+        @notably_changed ||= begin
+          only = @record.paper_trail_options[:only].dup
+          # Remove Hash arguments and then evaluate whether the attributes (the
+          # keys of the hash) should also get pushed into the collection.
+          only.delete_if do |obj|
+            obj.is_a?(Hash) &&
+              obj.each { |attr, condition|
+                only << attr if condition.respond_to?(:call) && condition.call(@record)
+              }
+          end
+          only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
         end
-        only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
       end
 
       # Returns hash of attributes (with appropriate attributes serialized),

diff --git a/lib/paper_trail/events/create.rb b/lib/paper_trail/events/create.rb
@@ -13,6 +13,7 @@ class Create < Base
       # @api private
       def data
         data = {
+          item: @record,
           event: @record.paper_trail_event || "create",
           whodunnit: PaperTrail.request.whodunnit
         }

diff --git a/lib/paper_trail/events/update.rb b/lib/paper_trail/events/update.rb
@@ -25,6 +25,7 @@ def initialize(record, in_after_callback, is_touch, force_changes)
       # @api private
       def data
         data = {
+          item: @record,
           event: @record.paper_trail_event || "update",
           whodunnit: PaperTrail.request.whodunnit
         }

diff --git a/lib/paper_trail/record_trail.rb b/lib/paper_trail/record_trail.rb
@@ -67,14 +67,26 @@ def previous_version
 
     def record_create
       return unless enabled?
-      event = Events::Create.new(@record, true)
+
+      build_version_on_create(in_after_callback: true).tap do |version|
+        version.save!
+        # One more Version object has been created. Need to invalidate
+        # the `versions` association cache, otherwise the new version
+        # can stay invisible within that association
+        versions.reset
+      end
+    end
+
+    # @api private
+    def build_version_on_create(in_after_callback:)
+      event = Events::Create.new(@record, in_after_callback)
 
       # Merge data from `Event` with data from PT-AT. We no longer use
       # `data_for_create` but PT-AT still does.
-      data = event.data.merge(data_for_create)
+      data = event.data.merge!(data_for_create)
 
-      versions_assoc = @record.send(@record.class.versions_association_name)
-      versions_assoc.create!(data)
+      # Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
+      @record.class.paper_trail.version_class.new(data)
     end
 
     # PT-AT extends this method to add its transaction id.
@@ -119,20 +131,36 @@ def data_for_destroy
     # paper_trail-association_tracking
     def record_update(force:, in_after_callback:, is_touch:)
       return unless enabled?
+
+      version = build_version_on_update(
+        force: force,
+        in_after_callback: in_after_callback,
+        is_touch: is_touch
+      )
+      return unless version
+
+      if version.save
+        # One more Version object has been created. Need to invalidate
+        # the `versions` association cache, otherwise the new version
+        # can stay invisible within that association
+        versions.reset
+        version
+      else
+        log_version_errors(version, :update)
+      end
+    end
+
+    # @api private
+    def build_version_on_update(force:, in_after_callback:, is_touch:)
       event = Events::Update.new(@record, in_after_callback, is_touch, nil)
       return unless force || event.changed_notably?
 
       # Merge data from `Event` with data from PT-AT. We no longer use
       # `data_for_update` but PT-AT still does.
-      data = event.data.merge(data_for_update)
+      data = event.data.merge!(data_for_update)
 
-      versions_assoc = @record.send(@record.class.versions_association_name)
-      version = versions_assoc.create(data)
-      if version.errors.any?
-        log_version_errors(version, :update)
-      else
-        version
-      end
+      # Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
+      @record.class.paper_trail.version_class.new(data)
     end
 
     # PT-AT extends this method to add its transaction id.

diff --git a/paper_trail.gemspec b/paper_trail.gemspec
@@ -35,6 +35,7 @@ has been destroyed.
   s.add_development_dependency "byebug", "~> 10.0"
   s.add_development_dependency "ffaker", "~> 2.8"
   s.add_development_dependency "generator_spec", "~> 0.9.4"
+  s.add_development_dependency "memory_profiler", "~> 0.9.12"
   s.add_development_dependency "mysql2", "~> 0.5.2"
   s.add_development_dependency "paper_trail-association_tracking", "~> 2.0.0"
   s.add_development_dependency "pg", "~> 1.0"

diff --git a/spec/paper_trail/model_spec.rb b/spec/paper_trail/model_spec.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require "spec_helper"
+require "support/performance_helpers"
 
 RSpec.describe(::PaperTrail, versioning: true) do
   context "a new record" do
@@ -868,4 +869,74 @@
       expect(widget.versions.empty?).to(eq(true))
     end
   end
+
+  context "Memory allocation of" do
+    let(:widget) do
+      Widget.new(
+        name: "Warble",
+        a_text: "The quick brown fox",
+        an_integer: 42,
+        a_float: 153.01,
+        a_decimal: 2.71828,
+        a_boolean: true
+      )
+    end
+
+    before do
+      # Json fields for `object` & `object_changes` attributes is most efficient way
+      # to do the things - this way we will save even more RAM, as well as will skip
+      # the whole YAML serialization
+      allow(PaperTrail::Version).to receive(:object_changes_col_is_json?).and_return(true)
+      allow(PaperTrail::Version).to receive(:object_col_is_json?).and_return(true)
+
+      # Force the loading of all lazy things like class definitions,
+      # in order to get the pure benchmark
+      version_building.call
+    end
+
+    describe "#build_version_on_create" do
+      let(:version_building) do
+        -> { widget.paper_trail.build_version_on_create(in_after_callback: false) }
+      end
+
+      it "is frugal enough" do
+        # Some time ago there was 95kbs..
+        # At the time of commit the test passes with assertion on 17kbs.
+        # Lets assert 20kbs then, to avoid flaky fails.
+        expect(&version_building).to allocate_less_than(20).kilobytes
+      end
+    end
+
+    describe "#build_version_on_update" do
+      let(:widget) do
+        super().tap do |w|
+          w.save!
+          w.attributes = {
+            name: "Dostoyevsky",
+            a_text: "The slow yellow mouse",
+            an_integer: 84,
+            a_float: 306.02,
+            a_decimal: 5.43656,
+            a_boolean: false
+          }
+        end
+      end
+      let(:version_building) do
+        lambda do
+          widget.paper_trail.build_version_on_update(
+            force: false,
+            in_after_callback: false,
+            is_touch: false
+          )
+        end
+      end
+
+      it "is frugal enough" do
+        # Some time ago there was 144kbs..
+        # At the time of commit the test passes with assertion on 27kbs.
+        # Lets assert 35kbs then, to avoid flaky fails.
+        expect(&version_building).to allocate_less_than(35).kilobytes
+      end
+    end
+  end
 end
diff --git a/spec/support/performance_helpers.rb b/spec/support/performance_helpers.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+require "memory_profiler"
+
+RSpec::Matchers.define :allocate_less_than do |expected|
+  supports_block_expectations
+
+  chain :bytes do
+    @scale = :bs
+  end
+
+  chain :kilobytes do
+    @scale = :kbs
+  end
+
+  chain :and_print_report do
+    @report = true
+  end
+
+  match do |actual|
+    @scale ||= :bs
+
+    benchmark = MemoryProfiler.report(ignore_files: /rspec/) { actual.call }
+
+    if @report
+      benchmark.pretty_print(detailed_report: true, scale_bytes: true)
+    end
+
+    @allocated = benchmark.total_allocated_memsize
+    @allocated /= 1024 if @scale == :kbs
+    @allocated <= expected
+  end
+
+  failure_message do
+    "expected that example will allocate less than #{expected}#{@scale},"\
+    " but allocated #{@allocated}#{@scale}"
+  end
+end