Skip to content

Commit

Permalink
Lower the memory allocations
Browse files Browse the repository at this point in the history
It prevents the repeating hashes creation during the calculation of `object` and `object_changes`.

The real-life case, discovered with a help of `memory_profiler` gem:
1. It saves `~35Mb` per bulk of 100 typical objects like `User`;
2. It saves `~875Mb` per Sidekiq process full of bulk jobs like P1,
   under default concurrency 25.
  • Loading branch information
stokarenko authored and jaredbeck committed Mar 14, 2019
1 parent 43a0634 commit cf17bc4
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 33 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ recommendations of [keepachangelog.com](http://keepachangelog.com/).
- [#1184](/~https://github.com/paper-trail-gem/paper_trail/pull/1184) -
No need to calculate previous values of skipped attributes

- [#1188](/~https://github.com/paper-trail-gem/paper_trail/pull/1188) -
Optimized the memory allocations during the building of every particular
Version object. That can help a lot for heavy bulk processing.
In additional we advise to use `json[b]` DB types for `object`
and `object_changes` Version columns, in order to reach best possible
RAM performance.

## 10.2.0 (2019-01-31)

### Breaking Changes
Expand Down
62 changes: 41 additions & 21 deletions lib/paper_trail/events/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,28 @@ def attribute_changed_in_latest_version?(attr_name)

# @api private
def nonskipped_attributes_before_change(is_touch)
record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])
cache_changed_attributes do
record_attributes = @record.attributes.except(*@record.paper_trail_options[:skip])

Hash[record_attributes.map do |k, v|
if @record.class.column_names.include?(k)
[k, attribute_in_previous_version(k, is_touch)]
else
[k, v]
record_attributes.each_key do |k|
if @record.class.column_names.include?(k)
record_attributes[k] = attribute_in_previous_version(k, is_touch)
end
end
end]
end
end

# Rails 5.1 changed the API of `ActiveRecord::Dirty`.
# @api private
def cache_changed_attributes
if RAILS_GTE_5_1
# Everything works fine as it is
yield
else
# Any particular call to `changed_attributes` produces the huge memory allocation.
# Lets use the generic AR workaround for that.
@record.send(:cache_changed_attributes) { yield }
end
end

# Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
Expand Down Expand Up @@ -110,18 +123,22 @@ def changed_and_not_ignored

# @api private
def changed_in_latest_version
changes_in_latest_version.keys
# Memoized to reduce memory usage
@changed_in_latest_version ||= changes_in_latest_version.keys
end

# Rails 5.1 changed the API of `ActiveRecord::Dirty`. See
# /~https://github.com/paper-trail-gem/paper_trail/pull/899
#
# @api private
def changes_in_latest_version
if @in_after_callback && RAILS_GTE_5_1
@record.saved_changes
else
@record.changes
# Memoized to reduce memory usage
@changes_in_latest_version ||= begin
if @in_after_callback && RAILS_GTE_5_1
@record.saved_changes
else
@record.changes
end
end
end

Expand Down Expand Up @@ -202,16 +219,19 @@ def notable_changes

# @api private
def notably_changed
only = @record.paper_trail_options[:only].dup
# Remove Hash arguments and then evaluate whether the attributes (the
# keys of the hash) should also get pushed into the collection.
only.delete_if do |obj|
obj.is_a?(Hash) &&
obj.each { |attr, condition|
only << attr if condition.respond_to?(:call) && condition.call(@record)
}
# Memoized to reduce memory usage
@notably_changed ||= begin
only = @record.paper_trail_options[:only].dup
# Remove Hash arguments and then evaluate whether the attributes (the
# keys of the hash) should also get pushed into the collection.
only.delete_if do |obj|
obj.is_a?(Hash) &&
obj.each { |attr, condition|
only << attr if condition.respond_to?(:call) && condition.call(@record)
}
end
only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
end
only.empty? ? changed_and_not_ignored : (changed_and_not_ignored & only)
end

# Returns hash of attributes (with appropriate attributes serialized),
Expand Down
1 change: 1 addition & 0 deletions lib/paper_trail/events/create.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class Create < Base
# @api private
def data
data = {
item: @record,
event: @record.paper_trail_event || "create",
whodunnit: PaperTrail.request.whodunnit
}
Expand Down
1 change: 1 addition & 0 deletions lib/paper_trail/events/update.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def initialize(record, in_after_callback, is_touch, force_changes)
# @api private
def data
data = {
item: @record,
event: @record.paper_trail_event || "update",
whodunnit: PaperTrail.request.whodunnit
}
Expand Down
52 changes: 40 additions & 12 deletions lib/paper_trail/record_trail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,26 @@ def previous_version

def record_create
return unless enabled?
event = Events::Create.new(@record, true)

build_version_on_create(in_after_callback: true).tap do |version|
version.save!
# One more Version object has been created. Need to invalidate
# the `versions` association cache, otherwise the new version
# can stay invisible within that association
versions.reset
end
end

# @api private
def build_version_on_create(in_after_callback:)
event = Events::Create.new(@record, in_after_callback)

# Merge data from `Event` with data from PT-AT. We no longer use
# `data_for_create` but PT-AT still does.
data = event.data.merge(data_for_create)
data = event.data.merge!(data_for_create)

versions_assoc = @record.send(@record.class.versions_association_name)
versions_assoc.create!(data)
# Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
@record.class.paper_trail.version_class.new(data)
end

# PT-AT extends this method to add its transaction id.
Expand Down Expand Up @@ -119,20 +131,36 @@ def data_for_destroy
# paper_trail-association_tracking
def record_update(force:, in_after_callback:, is_touch:)
return unless enabled?

version = build_version_on_update(
force: force,
in_after_callback: in_after_callback,
is_touch: is_touch
)
return unless version

if version.save
# One more Version object has been created. Need to invalidate
# the `versions` association cache, otherwise the new version
# can stay invisible within that association
versions.reset
version
else
log_version_errors(version, :update)
end
end

# @api private
def build_version_on_update(force:, in_after_callback:, is_touch:)
event = Events::Update.new(@record, in_after_callback, is_touch, nil)
return unless force || event.changed_notably?

# Merge data from `Event` with data from PT-AT. We no longer use
# `data_for_update` but PT-AT still does.
data = event.data.merge(data_for_update)
data = event.data.merge!(data_for_update)

versions_assoc = @record.send(@record.class.versions_association_name)
version = versions_assoc.create(data)
if version.errors.any?
log_version_errors(version, :update)
else
version
end
# Pure `version_class.new` reduces memory usage compared to `versions_assoc.build`
@record.class.paper_trail.version_class.new(data)
end

# PT-AT extends this method to add its transaction id.
Expand Down
1 change: 1 addition & 0 deletions paper_trail.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ has been destroyed.
s.add_development_dependency "byebug", "~> 10.0"
s.add_development_dependency "ffaker", "~> 2.8"
s.add_development_dependency "generator_spec", "~> 0.9.4"
s.add_development_dependency "memory_profiler", "~> 0.9.12"
s.add_development_dependency "mysql2", "~> 0.5.2"
s.add_development_dependency "paper_trail-association_tracking", "~> 2.0.0"
s.add_development_dependency "pg", "~> 1.0"
Expand Down
71 changes: 71 additions & 0 deletions spec/paper_trail/model_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

require "spec_helper"
require "support/performance_helpers"

RSpec.describe(::PaperTrail, versioning: true) do
context "a new record" do
Expand Down Expand Up @@ -868,4 +869,74 @@
expect(widget.versions.empty?).to(eq(true))
end
end

context "Memory allocation of" do
let(:widget) do
Widget.new(
name: "Warble",
a_text: "The quick brown fox",
an_integer: 42,
a_float: 153.01,
a_decimal: 2.71828,
a_boolean: true
)
end

before do
# Json fields for `object` & `object_changes` attributes is most efficient way
# to do the things - this way we will save even more RAM, as well as will skip
# the whole YAML serialization
allow(PaperTrail::Version).to receive(:object_changes_col_is_json?).and_return(true)
allow(PaperTrail::Version).to receive(:object_col_is_json?).and_return(true)

# Force the loading of all lazy things like class definitions,
# in order to get the pure benchmark
version_building.call
end

describe "#build_version_on_create" do
let(:version_building) do
-> { widget.paper_trail.build_version_on_create(in_after_callback: false) }
end

it "is frugal enough" do
# Some time ago there was 95kbs..
# At the time of commit the test passes with assertion on 17kbs.
# Lets assert 20kbs then, to avoid flaky fails.
expect(&version_building).to allocate_less_than(20).kilobytes
end
end

describe "#build_version_on_update" do
let(:widget) do
super().tap do |w|
w.save!
w.attributes = {
name: "Dostoyevsky",
a_text: "The slow yellow mouse",
an_integer: 84,
a_float: 306.02,
a_decimal: 5.43656,
a_boolean: false
}
end
end
let(:version_building) do
lambda do
widget.paper_trail.build_version_on_update(
force: false,
in_after_callback: false,
is_touch: false
)
end
end

it "is frugal enough" do
# Some time ago there was 144kbs..
# At the time of commit the test passes with assertion on 27kbs.
# Lets assert 35kbs then, to avoid flaky fails.
expect(&version_building).to allocate_less_than(35).kilobytes
end
end
end
end
38 changes: 38 additions & 0 deletions spec/support/performance_helpers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# frozen_string_literal: true

require "memory_profiler"

RSpec::Matchers.define :allocate_less_than do |expected|
supports_block_expectations

chain :bytes do
@scale = :bs
end

chain :kilobytes do
@scale = :kbs
end

chain :and_print_report do
@report = true
end

match do |actual|
@scale ||= :bs

benchmark = MemoryProfiler.report(ignore_files: /rspec/) { actual.call }

if @report
benchmark.pretty_print(detailed_report: true, scale_bytes: true)
end

@allocated = benchmark.total_allocated_memsize
@allocated /= 1024 if @scale == :kbs
@allocated <= expected
end

failure_message do
"expected that example will allocate less than #{expected}#{@scale},"\
" but allocated #{@allocated}#{@scale}"
end
end

0 comments on commit cf17bc4

Please sign in to comment.