Skip to content

Commit

Permalink
Rollup merge of rust-lang#67889 - Zoxc:parallel-cgus, r=michaelwoerister
Browse files Browse the repository at this point in the history
Compile some CGUs in parallel at the start of codegen

This brings the compilation time for `syntex_syntax` from 11.542s to 10.453s with 6 threads in non-incremental debug mode. Just compiling `n` CGUs in parallel at the beginning of codegen seems sufficient to get rid of the staircase effect, at least for `syntex_syntax`.

Based on rust-lang#67777.

r? @michaelwoerister
cc @alexcrichton @Mark-Simulacrum
  • Loading branch information
Centril authored Jan 11, 2020
2 parents cd47af1 + 69bacd0 commit 7b741fb
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 22 deletions.
10 changes: 4 additions & 6 deletions src/librustc_codegen_llvm/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! but one `llvm::Type` corresponds to many `Ty`s; for instance, `tup(int, int,
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
use super::{LlvmCodegenBackend, ModuleLlvm};
use super::ModuleLlvm;

use crate::builder::Builder;
use crate::common;
Expand All @@ -29,7 +29,6 @@ use rustc::middle::exported_symbols;
use rustc::mir::mono::{Linkage, Visibility};
use rustc::session::config::DebugInfo;
use rustc::ty::TyCtxt;
use rustc_codegen_ssa::back::write::submit_codegened_module_to_llvm;
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::*;
Expand Down Expand Up @@ -100,8 +99,7 @@ pub fn iter_globals(llmod: &'ll llvm::Module) -> ValueIter<'ll> {
pub fn compile_codegen_unit(
tcx: TyCtxt<'tcx>,
cgu_name: Symbol,
tx_to_llvm_workers: &std::sync::mpsc::Sender<Box<dyn std::any::Any + Send>>,
) {
) -> (ModuleCodegen<ModuleLlvm>, u64) {
let prof_timer = tcx.prof.generic_activity("codegen_module");
let start_time = Instant::now();

Expand All @@ -115,8 +113,6 @@ pub fn compile_codegen_unit(
// the time we needed for codegenning it.
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;

submit_codegened_module_to_llvm(&LlvmCodegenBackend(()), tx_to_llvm_workers, module, cost);

fn module_codegen(tcx: TyCtxt<'_>, cgu_name: Symbol) -> ModuleCodegen<ModuleLlvm> {
let cgu = tcx.codegen_unit(cgu_name);
// Instantiate monomorphizations without filling out definitions yet...
Expand Down Expand Up @@ -164,6 +160,8 @@ pub fn compile_codegen_unit(
kind: ModuleKind::Regular,
}
}

(module, cost)
}

pub fn set_link_section(llval: &Value, attrs: &CodegenFnAttrs) {
Expand Down
6 changes: 3 additions & 3 deletions src/librustc_codegen_llvm/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#![feature(link_args)]
#![feature(static_nobundle)]
#![feature(trusted_len)]
#![recursion_limit = "256"]

use back::write::{create_informational_target_machine, create_target_machine};
use rustc_span::symbol::Symbol;
Expand Down Expand Up @@ -108,9 +109,8 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
&self,
tcx: TyCtxt<'_>,
cgu_name: Symbol,
tx: &std::sync::mpsc::Sender<Box<dyn Any + Send>>,
) {
base::compile_codegen_unit(tcx, cgu_name, tx);
) -> (ModuleCodegen<ModuleLlvm>, u64) {
base::compile_codegen_unit(tcx, cgu_name)
}
fn target_machine_factory(
&self,
Expand Down
86 changes: 77 additions & 9 deletions src/librustc_codegen_ssa/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
use crate::back::write::{
start_async_codegen, submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm,
OngoingCodegen,
start_async_codegen, submit_codegened_module_to_llvm, submit_post_lto_module_to_llvm,
submit_pre_lto_module_to_llvm, OngoingCodegen,
};
use crate::common::{IntPredicate, RealPredicate, TypeKind};
use crate::meth;
Expand All @@ -40,6 +40,7 @@ use rustc::ty::{self, Instance, Ty, TyCtxt};
use rustc_codegen_utils::{check_for_rustc_errors_attr, symbol_names_test};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::profiling::print_time_passes_entry;
use rustc_data_structures::sync::{par_iter, Lock, ParallelIterator};
use rustc_hir as hir;
use rustc_hir::def_id::{DefId, LOCAL_CRATE};
use rustc_index::vec::Idx;
Expand Down Expand Up @@ -606,20 +607,83 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
codegen_units
};

let mut total_codegen_time = Duration::new(0, 0);
let total_codegen_time = Lock::new(Duration::new(0, 0));

for cgu in codegen_units.into_iter() {
// The non-parallel compiler can only translate codegen units to LLVM IR
// on a single thread, leading to a staircase effect where the N LLVM
// threads have to wait on the single codegen threads to generate work
// for them. The parallel compiler does not have this restriction, so
// we can pre-load the LLVM queue in parallel before handing off
// coordination to the OnGoingCodegen scheduler.
//
// This likely is a temporary measure. Once we don't have to support the
// non-parallel compiler anymore, we can compile CGUs end-to-end in
// parallel and get rid of the complicated scheduling logic.
let pre_compile_cgus = |cgu_reuse: &[CguReuse]| {
if cfg!(parallel_compiler) {
tcx.sess.time("compile_first_CGU_batch", || {
// Try to find one CGU to compile per thread.
let cgus: Vec<_> = cgu_reuse
.iter()
.enumerate()
.filter(|&(_, reuse)| reuse == &CguReuse::No)
.take(tcx.sess.threads())
.collect();

// Compile the found CGUs in parallel.
par_iter(cgus)
.map(|(i, _)| {
let start_time = Instant::now();
let module = backend.compile_codegen_unit(tcx, codegen_units[i].name());
let mut time = total_codegen_time.lock();
*time += start_time.elapsed();
(i, module)
})
.collect()
})
} else {
FxHashMap::default()
}
};

let mut cgu_reuse = Vec::new();
let mut pre_compiled_cgus: Option<FxHashMap<usize, _>> = None;

for (i, cgu) in codegen_units.iter().enumerate() {
ongoing_codegen.wait_for_signal_to_codegen_item();
ongoing_codegen.check_for_errors(tcx.sess);

let cgu_reuse = determine_cgu_reuse(tcx, &cgu);
// Do some setup work in the first iteration
if pre_compiled_cgus.is_none() {
// Calculate the CGU reuse
cgu_reuse = tcx.sess.time("find_cgu_reuse", || {
codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect()
});
// Pre compile some CGUs
pre_compiled_cgus = Some(pre_compile_cgus(&cgu_reuse));
}

let cgu_reuse = cgu_reuse[i];
tcx.sess.cgu_reuse_tracker.set_actual_reuse(&cgu.name().as_str(), cgu_reuse);

match cgu_reuse {
CguReuse::No => {
let start_time = Instant::now();
backend.compile_codegen_unit(tcx, cgu.name(), &ongoing_codegen.coordinator_send);
total_codegen_time += start_time.elapsed();
let (module, cost) =
if let Some(cgu) = pre_compiled_cgus.as_mut().unwrap().remove(&i) {
cgu
} else {
let start_time = Instant::now();
let module = backend.compile_codegen_unit(tcx, cgu.name());
let mut time = total_codegen_time.lock();
*time += start_time.elapsed();
module
};
submit_codegened_module_to_llvm(
&backend,
&ongoing_codegen.coordinator_send,
module,
cost,
);
false
}
CguReuse::PreLto => {
Expand Down Expand Up @@ -652,7 +716,11 @@ pub fn codegen_crate<B: ExtraBackendMethods>(

// Since the main thread is sometimes blocked during codegen, we keep track
// -Ztime-passes output manually.
print_time_passes_entry(tcx.sess.time_passes(), "codegen_to_LLVM_IR", total_codegen_time);
print_time_passes_entry(
tcx.sess.time_passes(),
"codegen_to_LLVM_IR",
total_codegen_time.into_inner(),
);

::rustc_incremental::assert_module_sources::assert_module_sources(tcx);

Expand Down
9 changes: 5 additions & 4 deletions src/librustc_codegen_ssa/traits/backend.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::write::WriteBackendMethods;
use super::CodegenObject;
use crate::ModuleCodegen;

use rustc::middle::cstore::EncodedMetadata;
use rustc::session::{config, Session};
Expand All @@ -10,7 +11,6 @@ use rustc_codegen_utils::codegen_backend::CodegenBackend;
use rustc_span::symbol::Symbol;
use syntax::expand::allocator::AllocatorKind;

use std::sync::mpsc;
use std::sync::Arc;

pub trait BackendTypes {
Expand All @@ -34,7 +34,7 @@ impl<'tcx, T> Backend<'tcx> for T where
{
}

pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send {
pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send + Sync {
fn new_metadata(&self, sess: TyCtxt<'_>, mod_name: &str) -> Self::Module;
fn write_compressed_metadata<'tcx>(
&self,
Expand All @@ -48,12 +48,13 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
mods: &mut Self::Module,
kind: AllocatorKind,
);
/// This generates the codegen unit and returns it along with
/// a `u64` giving an estimate of the unit's processing cost.
fn compile_codegen_unit(
&self,
tcx: TyCtxt<'_>,
cgu_name: Symbol,
tx_to_llvm_workers: &mpsc::Sender<Box<dyn std::any::Any + Send>>,
);
) -> (ModuleCodegen<Self::Module>, u64);
// If find_features is true this won't access `sess.crate_types` by assuming
// that `is_pie_binary` is false. When we discover LLVM target features
// `sess.crate_types` is uninitialized so we cannot access it.
Expand Down

0 comments on commit 7b741fb

Please sign in to comment.