diff --git a/src/rust/perftools/profiler/mod.rs b/src/rust/perftools/profiler/mod.rs
index 249d74e65..af13d2458 100644
--- a/src/rust/perftools/profiler/mod.rs
+++ b/src/rust/perftools/profiler/mod.rs
@@ -220,7 +220,7 @@ impl Profiler {
 
         writeln!(
             out,
-            "call_depth,thread_id,function_name,percent_time,cycles_per_call,nanoseconds_per_call"
+            "call_depth,thread_id,function_name,num_calls,percent_time,cycles_per_call,nanoseconds_per_call"
         )?;
         for root in self.roots.iter() {
             root.borrow()
diff --git a/src/rust/perftools/profiler/scope.rs b/src/rust/perftools/profiler/scope.rs
index 51309ef2b..f1e068a6b 100644
--- a/src/rust/perftools/profiler/scope.rs
+++ b/src/rust/perftools/profiler/scope.rs
@@ -157,8 +157,9 @@ impl Scope {
         }
         writeln!(
             out,
-            "{},{},{},{}",
+            "{},{},{},{},{}",
             format!("{},{:?},{}", markers, thread_id, self.name),
+            self.num_calls,
             percent_time,
             duration_sum_secs / (self.num_calls as f64),
             duration_sum_secs / (self.num_calls as f64) * ns_per_cycle,
diff --git a/tools/perf.py b/tools/perf.py
index 7dd0719f1..103649a6c 100644
--- a/tools/perf.py
+++ b/tools/perf.py
@@ -62,7 +62,7 @@ def __get_perf_data(log_dir):
     # with the data for transformations.
     perf_df = pd.read_csv(
         StringIO('\n'.join(collapsed_stacks)),
-        names=['collapsed_stack', 'percent_time', 'cycles_per_call', 'nanoseconds_per_call'])
+        names=['collapsed_stack', 'num_calls', 'percent_time', 'cycles_per_call', 'nanoseconds_per_call'])
 
     # There will be multiple entries for each function in the perf data coming
     # from different files. So, we need to collapse them into a single entry
@@ -103,7 +103,7 @@ def __populate_collapsed_stacks(collapsed_stacks, file):
             # The collapsed stack is a string that contains the function names
             # separated by a semicolon.
             collapsed_stack = ";".join(current_stack)
-            collapsed_stacks.append(f"{collapsed_stack},{row['percent_time']},{row['cycles_per_call']},{row['nanoseconds_per_call']}")
+            collapsed_stacks.append(f"{collapsed_stack},{row['num_calls']},{row['percent_time']},{row['cycles_per_call']},{row['nanoseconds_per_call']}")
 
 
 def __get_file_df(file):
@@ -114,8 +114,7 @@ def __get_file_df(file):
     file_df = pd.read_csv(
         StringIO('\n'.join(lines)),
         delimiter=',',
-        names=['call_depth', 'thread_id', 'function_name', 'percent_time',
-               'cycles_per_call', 'nanoseconds_per_call'])
+        names=['call_depth', 'thread_id', 'function_name', 'num_calls', 'percent_time', 'cycles_per_call', 'nanoseconds_per_call'])
 
     # Number of '+' characters in the call_depth column denotes the depth of
     # the function call.
@@ -140,12 +139,12 @@ def __print_perf_data(perf_df):
 
     # Typically, time is the most important metric to sort by. However, you can
     # sort by any column.
-    sort_by_columns = ['percent_time', 'cycles_per_call', 'nanoseconds_per_call']
+    sort_by_columns = ['num_calls', 'cycles_per_call', 'nanoseconds_per_call', 'percent_time']
 
     # The columns that we are interested in displaying in the table.
     # collapsed_stack is important because it denotes the complete function call
     # stack.
-    columns_to_display = ['collapsed_stack', 'percent_time', 'cycles_per_call', 'nanoseconds_per_call']
+    columns_to_display = ['collapsed_stack', 'num_calls', 'cycles_per_call', 'nanoseconds_per_call', 'percent_time']
 
     # We are interested in the aggregated perf data. We sort the data by the
     # important columns and display only the relevant columns.