From 6ef8198406af9ba18787e0fd50ae2e25f78b7e4f Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Fri, 12 Aug 2016 18:22:02 -0400 Subject: [PATCH 01/11] Move `flock.rs` from librustdoc to librustc_data_structures. --- mk/crates.mk | 7 +++---- src/{librustdoc => librustc_data_structures}/flock.rs | 0 src/librustc_data_structures/lib.rs | 4 ++++ src/librustdoc/html/render.rs | 3 ++- src/librustdoc/lib.rs | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) rename src/{librustdoc => librustc_data_structures}/flock.rs (100%) diff --git a/mk/crates.mk b/mk/crates.mk index 5ff6d7a89dbe0..a915d07384f3c 100644 --- a/mk/crates.mk +++ b/mk/crates.mk @@ -112,7 +112,7 @@ DEPS_rustc := syntax fmt_macros flate arena serialize getopts rbml \ rustc_const_math syntax_pos rustc_errors DEPS_rustc_back := std syntax flate log libc DEPS_rustc_borrowck := rustc log graphviz syntax syntax_pos rustc_errors rustc_mir -DEPS_rustc_data_structures := std log serialize +DEPS_rustc_data_structures := std log serialize libc DEPS_rustc_driver := arena flate getopts graphviz libc rustc rustc_back rustc_borrowck \ rustc_typeck rustc_mir rustc_resolve log syntax serialize rustc_llvm \ rustc_trans rustc_privacy rustc_lint rustc_plugin \ @@ -137,9 +137,8 @@ DEPS_rustc_save_analysis := rustc log syntax syntax_pos serialize DEPS_rustc_typeck := rustc syntax syntax_pos rustc_platform_intrinsics rustc_const_math \ rustc_const_eval rustc_errors -DEPS_rustdoc := rustc rustc_driver native:hoedown serialize getopts \ - test rustc_lint rustc_const_eval syntax_pos - +DEPS_rustdoc := rustc rustc_driver native:hoedown serialize getopts test \ + rustc_lint rustc_const_eval syntax_pos rustc_data_structures TOOL_DEPS_compiletest := test getopts log serialize TOOL_DEPS_rustdoc := rustdoc diff --git a/src/librustdoc/flock.rs b/src/librustc_data_structures/flock.rs similarity index 100% rename from src/librustdoc/flock.rs rename to src/librustc_data_structures/flock.rs diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs index 34c3961d5b4c1..4391123559f9b 100644 --- a/src/librustc_data_structures/lib.rs +++ b/src/librustc_data_structures/lib.rs @@ -30,6 +30,7 @@ #![feature(staged_api)] #![feature(unboxed_closures)] #![feature(fn_traits)] +#![feature(libc)] #![cfg_attr(test, feature(test))] @@ -37,6 +38,8 @@ extern crate core; #[macro_use] extern crate log; extern crate serialize as rustc_serialize; // used by deriving +#[cfg(unix)] +extern crate libc; pub mod bitvec; pub mod graph; @@ -51,6 +54,7 @@ pub mod fnv; pub mod tuple_slice; pub mod veccell; pub mod control_flow_graph; +pub mod flock; // See comments in src/librustc/lib.rs #[doc(hidden)] diff --git a/src/librustdoc/html/render.rs b/src/librustdoc/html/render.rs index 5cb5cc051870b..d3b9ca737b990 100644 --- a/src/librustdoc/html/render.rs +++ b/src/librustdoc/html/render.rs @@ -62,6 +62,7 @@ use rustc::middle::stability; use rustc::session::config::get_unstable_features_setting; use rustc::hir; use rustc::util::nodemap::{FnvHashMap, FnvHashSet}; +use rustc_data_structures::flock; use clean::{self, Attributes, GetDefId}; use doctree; @@ -651,7 +652,7 @@ fn write_shared(cx: &Context, // docs placed in the output directory, so this needs to be a synchronized // operation with respect to all other rustdocs running around. try_err!(mkdir(&cx.dst), &cx.dst); - let _lock = ::flock::Lock::new(&cx.dst.join(".lock")); + let _lock = flock::Lock::new(&cx.dst.join(".lock")); // Add all the static files. These may already exist, but we just // overwrite them anyway to make sure that they're fresh and up-to-date. diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 255e6b1e786df..0e685f063bd7b 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -35,6 +35,7 @@ extern crate libc; extern crate rustc; extern crate rustc_const_eval; extern crate rustc_const_math; +extern crate rustc_data_structures; extern crate rustc_trans; extern crate rustc_driver; extern crate rustc_resolve; @@ -86,7 +87,6 @@ pub mod plugins; pub mod visit_ast; pub mod visit_lib; pub mod test; -mod flock; use clean::Attributes; From 206e7b6fc704c53b2a7174e8bec7b5f575d9bc93 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 15 Aug 2016 13:52:38 -0400 Subject: [PATCH 02/11] Add some features to flock. --- src/librustc_data_structures/flock.rs | 126 +++++++++++++++++++++----- src/librustdoc/html/render.rs | 2 +- 2 files changed, 106 insertions(+), 22 deletions(-) diff --git a/src/librustc_data_structures/flock.rs b/src/librustc_data_structures/flock.rs index 41bcfdb7cb0f0..adfeaae847537 100644 --- a/src/librustc_data_structures/flock.rs +++ b/src/librustc_data_structures/flock.rs @@ -15,6 +15,7 @@ //! librustdoc, it is not production quality at all. #![allow(non_camel_case_types)] +use std::path::Path; pub use self::imp::Lock; @@ -41,6 +42,7 @@ mod imp { pub l_sysid: libc::c_int, } + pub const F_RDLCK: libc::c_short = 0; pub const F_WRLCK: libc::c_short = 1; pub const F_UNLCK: libc::c_short = 2; pub const F_SETLK: libc::c_int = 6; @@ -60,6 +62,7 @@ mod imp { pub l_sysid: libc::c_int, } + pub const F_RDLCK: libc::c_short = 1; pub const F_UNLCK: libc::c_short = 2; pub const F_WRLCK: libc::c_short = 3; pub const F_SETLK: libc::c_int = 12; @@ -84,6 +87,7 @@ mod imp { pub l_sysid: libc::c_int, } + pub const F_RDLCK: libc::c_short = 1; pub const F_UNLCK: libc::c_short = 2; pub const F_WRLCK: libc::c_short = 3; pub const F_SETLK: libc::c_int = 8; @@ -105,6 +109,7 @@ mod imp { pub l_sysid: libc::c_int, } + pub const F_RDLCK: libc::c_short = 1; pub const F_UNLCK: libc::c_short = 2; pub const F_WRLCK: libc::c_short = 3; pub const F_SETLK: libc::c_int = 8; @@ -124,6 +129,7 @@ mod imp { pub l_pid: libc::pid_t, } + pub const F_RDLCK: libc::c_short = 1; pub const F_WRLCK: libc::c_short = 2; pub const F_UNLCK: libc::c_short = 3; pub const F_SETLK: libc::c_int = 6; @@ -135,32 +141,53 @@ mod imp { } impl Lock { - pub fn new(p: &Path) -> Lock { + pub fn new(p: &Path, + wait: bool, + create: bool, + exclusive: bool) + -> io::Result { let os: &OsStr = p.as_ref(); let buf = CString::new(os.as_bytes()).unwrap(); + let open_flags = if create { + libc::O_RDWR | libc::O_CREAT + } else { + libc::O_RDWR + }; + let fd = unsafe { - libc::open(buf.as_ptr(), libc::O_RDWR | libc::O_CREAT, + libc::open(buf.as_ptr(), open_flags, libc::S_IRWXU as libc::c_int) }; - assert!(fd > 0, "failed to open lockfile: {}", - io::Error::last_os_error()); + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + let lock_type = if exclusive { + os::F_WRLCK + } else { + os::F_RDLCK + }; + let flock = os::flock { l_start: 0, l_len: 0, l_pid: 0, l_whence: libc::SEEK_SET as libc::c_short, - l_type: os::F_WRLCK, + l_type: lock_type, l_sysid: 0, }; + let cmd = if wait { os::F_SETLKW } else { os::F_SETLK }; let ret = unsafe { - libc::fcntl(fd, os::F_SETLKW, &flock) + libc::fcntl(fd, cmd, &flock) }; if ret == -1 { let err = io::Error::last_os_error(); unsafe { libc::close(fd); } - panic!("could not lock `{}`: {}", p.display(), err); + Err(err) + } else { + Ok(Lock { fd: fd }) } - Lock { fd: fd } } } @@ -191,18 +218,28 @@ mod imp { use std::os::windows::raw::HANDLE; use std::path::Path; use std::fs::{File, OpenOptions}; + use std::os::raw::{c_ulong, c_ulonglong, c_int}; + use std::os::windows::fs::OpenOptionsExt; + + pub type DWORD = c_ulong; + pub type BOOL = c_int; + pub type ULONG_PTR = c_ulonglong; - type DWORD = u32; type LPOVERLAPPED = *mut OVERLAPPED; - type BOOL = i32; const LOCKFILE_EXCLUSIVE_LOCK: DWORD = 0x00000002; + const LOCKFILE_FAIL_IMMEDIATELY: DWORD = 0x00000001; + + pub const FILE_SHARE_DELETE: DWORD = 0x4; + pub const FILE_SHARE_READ: DWORD = 0x1; + pub const FILE_SHARE_WRITE: DWORD = 0x2; #[repr(C)] struct OVERLAPPED { - Internal: usize, - InternalHigh: usize, - Pointer: *mut u8, - hEvent: *mut u8, + Internal: ULONG_PTR, + InternalHigh: ULONG_PTR, + Offset: DWORD, + OffsetHigh: DWORD, + hEvent: HANDLE, } extern "system" { @@ -219,19 +256,66 @@ mod imp { } impl Lock { - pub fn new(p: &Path) -> Lock { - let f = OpenOptions::new().read(true).write(true).create(true) - .open(p).unwrap(); + pub fn new(p: &Path, + wait: bool, + create: bool, + exclusive: bool) + -> io::Result { + + let share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE; + + let f = { + let mut open_options = OpenOptions::new().read(true) + .share_mode(share_mode); + if create { + open_options.create(true); + } + + match open_options.open(p) { + Ok(file) => file, + Err(err) => return Err(err), + } + }; + let ret = unsafe { let mut overlapped: OVERLAPPED = mem::zeroed(); - LockFileEx(f.as_raw_handle(), LOCKFILE_EXCLUSIVE_LOCK, 0, 100, 0, + + let mut dwFlags = 0; + if !wait { + dwFlags |= LOCKFILE_FAIL_IMMEDIATELY; + } + + if exclusive { + dwFlags |= LOCKFILE_EXCLUSIVE_LOCK; + } + + LockFileEx(f.as_raw_handle(), + dwFlags, + 0, + 0xFFFF_FFFF, + 0xFFFF_FFFF, &mut overlapped) }; if ret == 0 { - let err = io::Error::last_os_error(); - panic!("could not lock `{}`: {}", p.display(), err); + Err(io::Error::last_os_error()) + } else { + Ok(Lock { _file: f }) } - Lock { _file: f } } } + + // Note that we don't need a Drop impl on the Windows: The file is unlocked + // automatically when it's closed. +} + +impl imp::Lock { + pub fn panicking_new(p: &Path, + wait: bool, + create: bool, + exclusive: bool) + -> Lock { + Lock::new(p, wait, create, exclusive).unwrap_or_else(|err| { + panic!("could not lock `{}`: {}", p.display(), err); + }) + } } diff --git a/src/librustdoc/html/render.rs b/src/librustdoc/html/render.rs index d3b9ca737b990..6d523ff381556 100644 --- a/src/librustdoc/html/render.rs +++ b/src/librustdoc/html/render.rs @@ -652,7 +652,7 @@ fn write_shared(cx: &Context, // docs placed in the output directory, so this needs to be a synchronized // operation with respect to all other rustdocs running around. try_err!(mkdir(&cx.dst), &cx.dst); - let _lock = flock::Lock::new(&cx.dst.join(".lock")); + let _lock = flock::Lock::panicking_new(&cx.dst.join(".lock"), true, true, true); // Add all the static files. These may already exist, but we just // overwrite them anyway to make sure that they're fresh and up-to-date. From 3e9bed92da499d7905232d47d54300134fca13b5 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Thu, 11 Aug 2016 19:02:39 -0400 Subject: [PATCH 03/11] Implement copy-on-write scheme for managing the incremental compilation cache. --- src/librustc/hir/svh.rs | 15 + src/librustc/session/mod.rs | 93 +- src/librustc/util/fs.rs | 41 +- src/librustc_data_structures/flock.rs | 2 + src/librustc_driver/driver.rs | 20 +- src/librustc_incremental/lib.rs | 2 + src/librustc_incremental/persist/fs.rs | 895 ++++++++++++++++++ src/librustc_incremental/persist/hash.rs | 54 +- src/librustc_incremental/persist/load.rs | 29 +- src/librustc_incremental/persist/mod.rs | 5 +- src/librustc_incremental/persist/save.rs | 35 +- src/librustc_incremental/persist/util.rs | 95 -- .../persist/work_product.rs | 4 +- src/librustc_trans/back/write.rs | 22 +- 14 files changed, 1162 insertions(+), 150 deletions(-) create mode 100644 src/librustc_incremental/persist/fs.rs delete mode 100644 src/librustc_incremental/persist/util.rs diff --git a/src/librustc/hir/svh.rs b/src/librustc/hir/svh.rs index d4e797c9f2d25..ae1f9d3028c2c 100644 --- a/src/librustc/hir/svh.rs +++ b/src/librustc/hir/svh.rs @@ -17,6 +17,7 @@ use std::fmt; use std::hash::{Hash, Hasher}; +use serialize::{Encodable, Decodable, Encoder, Decoder}; #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct Svh { @@ -51,3 +52,17 @@ impl fmt::Display for Svh { f.pad(&self.to_string()) } } + +impl Encodable for Svh { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_u64(self.as_u64().to_le()) + } +} + +impl Decodable for Svh { + fn decode(d: &mut D) -> Result { + d.read_u64() + .map(u64::from_le) + .map(Svh::new) + } +} diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs index c71253aee568f..02ee4571ab5d9 100644 --- a/src/librustc/session/mod.rs +++ b/src/librustc/session/mod.rs @@ -33,10 +33,11 @@ use syntax::feature_gate::AttributeType; use syntax_pos::{Span, MultiSpan}; use rustc_back::target::Target; +use rustc_data_structures::flock; use llvm; use std::path::{Path, PathBuf}; -use std::cell::{Cell, RefCell}; +use std::cell::{self, Cell, RefCell}; use std::collections::{HashMap, HashSet}; use std::env; use std::ffi::CString; @@ -101,6 +102,8 @@ pub struct Session { /// macro name and defintion span in the source crate. pub imported_macro_spans: RefCell>, + incr_comp_session: RefCell, + next_node_id: Cell, } @@ -331,6 +334,70 @@ impl Session { &self.opts.search_paths, kind) } + + pub fn init_incr_comp_session(&self, + session_dir: PathBuf, + lock_file: flock::Lock) { + let mut incr_comp_session = self.incr_comp_session.borrow_mut(); + + if let IncrCompSession::NotInitialized = *incr_comp_session { } else { + bug!("Trying to initialize IncrCompSession `{:?}`", *incr_comp_session) + } + + *incr_comp_session = IncrCompSession::Active { + session_directory: session_dir, + lock_file: lock_file, + }; + } + + pub fn finalize_incr_comp_session(&self, new_directory_path: PathBuf) { + let mut incr_comp_session = self.incr_comp_session.borrow_mut(); + + if let IncrCompSession::Active { .. } = *incr_comp_session { } else { + bug!("Trying to finalize IncrCompSession `{:?}`", *incr_comp_session) + } + + // Note: This will also drop the lock file, thus unlocking the directory + *incr_comp_session = IncrCompSession::Finalized { + session_directory: new_directory_path, + }; + } + + pub fn mark_incr_comp_session_as_invalid(&self) { + let mut incr_comp_session = self.incr_comp_session.borrow_mut(); + + if let IncrCompSession::Active { .. } = *incr_comp_session { } else { + bug!("Trying to invalidate IncrCompSession `{:?}`", *incr_comp_session) + } + + // Note: This will also drop the lock file, thus unlocking the directory + *incr_comp_session = IncrCompSession::InvalidBecauseOfErrors; + } + + pub fn incr_comp_session_dir(&self) -> cell::Ref { + let incr_comp_session = self.incr_comp_session.borrow(); + cell::Ref::map(incr_comp_session, |incr_comp_session| { + match *incr_comp_session { + IncrCompSession::NotInitialized | + IncrCompSession::InvalidBecauseOfErrors => { + bug!("Trying to get session directory from IncrCompSession `{:?}`", + *incr_comp_session) + } + IncrCompSession::Active { ref session_directory, .. } | + IncrCompSession::Finalized { ref session_directory } => { + session_directory + } + } + }) + } + + pub fn incr_comp_session_dir_opt(&self) -> Option> { + if self.opts.incremental.is_some() { + Some(self.incr_comp_session_dir()) + } else { + None + } + } } pub fn build_session(sopts: config::Options, @@ -446,6 +513,7 @@ pub fn build_session_(sopts: config::Options, injected_panic_runtime: Cell::new(None), available_macros: RefCell::new(HashSet::new()), imported_macro_spans: RefCell::new(HashMap::new()), + incr_comp_session: RefCell::new(IncrCompSession::NotInitialized), }; init_llvm(&sess); @@ -453,6 +521,29 @@ pub fn build_session_(sopts: config::Options, sess } +/// Holds data on the current incremental compilation session, if there is one. +#[derive(Debug)] +pub enum IncrCompSession { + // This is the state the session will be in until the incr. comp. dir is + // needed. + NotInitialized, + // This is the state during which the session directory is private and can + // be modified. + Active { + session_directory: PathBuf, + lock_file: flock::Lock, + }, + // This is the state after the session directory has been finalized. In this + // state, the contents of the directory must not be modified any more. + Finalized { + session_directory: PathBuf, + }, + // This is an error state that is reached when some compilation error has + // occurred. It indicates that the contents of the session directory must + // not be used, since they might be invalid. + InvalidBecauseOfErrors, +} + fn init_llvm(sess: &Session) { unsafe { // Before we touch LLVM, make sure that multithreading is enabled. diff --git a/src/librustc/util/fs.rs b/src/librustc/util/fs.rs index f4e1c06090e59..d7800ccaa5dd3 100644 --- a/src/librustc/util/fs.rs +++ b/src/librustc/util/fs.rs @@ -56,14 +56,49 @@ pub fn fix_windows_verbatim_for_gcc(p: &Path) -> PathBuf { } } +pub enum LinkOrCopy { + Link, + Copy +} + /// Copy `p` into `q`, preferring to use hard-linking if possible. If /// `q` already exists, it is removed first. -pub fn link_or_copy, Q: AsRef>(p: P, q: Q) -> io::Result<()> { +/// The result indicates which of the two operations has been performed. +pub fn link_or_copy, Q: AsRef>(p: P, q: Q) -> io::Result { let p = p.as_ref(); let q = q.as_ref(); if q.exists() { try!(fs::remove_file(&q)); } - fs::hard_link(p, q) - .or_else(|_| fs::copy(p, q).map(|_| ())) + + match fs::hard_link(p, q) { + Ok(()) => Ok(LinkOrCopy::Link), + Err(_) => { + match fs::copy(p, q) { + Ok(_) => Ok(LinkOrCopy::Copy), + Err(e) => Err(e) + } + } + } +} + +// Like std::fs::create_dir_all, except handles concurrent calls among multiple +// threads or processes. +pub fn create_dir_racy(path: &Path) -> io::Result<()> { + match fs::create_dir(path) { + Ok(()) => return Ok(()), + Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => return Ok(()), + Err(ref e) if e.kind() == io::ErrorKind::NotFound => {} + Err(e) => return Err(e), + } + match path.parent() { + Some(p) => try!(create_dir_racy(p)), + None => return Err(io::Error::new(io::ErrorKind::Other, + "failed to create whole tree")), + } + match fs::create_dir(path) { + Ok(()) => Ok(()), + Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => Ok(()), + Err(e) => Err(e), + } } diff --git a/src/librustc_data_structures/flock.rs b/src/librustc_data_structures/flock.rs index adfeaae847537..22f8d76399519 100644 --- a/src/librustc_data_structures/flock.rs +++ b/src/librustc_data_structures/flock.rs @@ -136,6 +136,7 @@ mod imp { pub const F_SETLKW: libc::c_int = 7; } + #[derive(Debug)] pub struct Lock { fd: libc::c_int, } @@ -251,6 +252,7 @@ mod imp { lpOverlapped: LPOVERLAPPED) -> BOOL; } + #[derive(Debug)] pub struct Lock { _file: File, } diff --git a/src/librustc_driver/driver.rs b/src/librustc_driver/driver.rs index 3f2f6c84da190..33d68523a0f94 100644 --- a/src/librustc_driver/driver.rs +++ b/src/librustc_driver/driver.rs @@ -88,7 +88,7 @@ pub fn compile_input(sess: &Session, // We need nested scopes here, because the intermediate results can keep // large chunks of memory alive and we want to free them as soon as // possible to keep the peak memory usage low - let (outputs, trans, crate_name) = { + let (outputs, trans) = { let krate = match phase_1_parse_input(sess, cfg, input) { Ok(krate) => krate, Err(mut parse_error) => { @@ -213,11 +213,11 @@ pub fn compile_input(sess: &Session, // Discard interned strings as they are no longer required. token::clear_ident_interner(); - Ok((outputs, trans, crate_name.clone())) + Ok((outputs, trans)) })?? }; - let phase5_result = phase_5_run_llvm_passes(sess, &crate_name, &trans, &outputs); + let phase5_result = phase_5_run_llvm_passes(sess, &trans, &outputs); controller_entry_point!(after_llvm, sess, @@ -229,6 +229,10 @@ pub fn compile_input(sess: &Session, phase_6_link_output(sess, &trans, &outputs); + // Now that we won't touch anything in the incremental compilation directory + // any more, we can finalize it (which involves renaming it) + rustc_incremental::finalize_session_directory(sess, trans.link.crate_hash); + controller_entry_point!(compilation_done, sess, CompileState::state_when_compilation_done(input, sess, outdir, output), @@ -1026,19 +1030,19 @@ pub fn phase_4_translate_to_llvm<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, time(time_passes, "assert dep graph", - move || rustc_incremental::assert_dep_graph(tcx)); + || rustc_incremental::assert_dep_graph(tcx)); time(time_passes, "serialize dep graph", - move || rustc_incremental::save_dep_graph(tcx, &incremental_hashes_map)); - + || rustc_incremental::save_dep_graph(tcx, + &incremental_hashes_map, + translation.link.crate_hash)); translation } /// Run LLVM itself, producing a bitcode file, assembly file or object file /// as a side effect. pub fn phase_5_run_llvm_passes(sess: &Session, - crate_name: &str, trans: &trans::CrateTranslation, outputs: &OutputFilenames) -> CompileResult { if sess.opts.cg.no_integrated_as { @@ -1061,7 +1065,7 @@ pub fn phase_5_run_llvm_passes(sess: &Session, time(sess.time_passes(), "serialize work products", - move || rustc_incremental::save_work_products(sess, crate_name)); + move || rustc_incremental::save_work_products(sess)); if sess.err_count() > 0 { Err(sess.err_count()) diff --git a/src/librustc_incremental/lib.rs b/src/librustc_incremental/lib.rs index d31d97b22cf4f..511ba8ec19cc7 100644 --- a/src/librustc_incremental/lib.rs +++ b/src/librustc_incremental/lib.rs @@ -22,6 +22,7 @@ #![feature(question_mark)] #![feature(rustc_private)] #![feature(staged_api)] +#![feature(rand)] extern crate graphviz; extern crate rbml; @@ -45,3 +46,4 @@ pub use persist::save_dep_graph; pub use persist::save_trans_partition; pub use persist::save_work_products; pub use persist::in_incr_comp_dir; +pub use persist::finalize_session_directory; diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs new file mode 100644 index 0000000000000..c2990c66020b5 --- /dev/null +++ b/src/librustc_incremental/persist/fs.rs @@ -0,0 +1,895 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +//! This module manages how the incremental compilation cache is represented in +//! the file system. +//! +//! Incremental compilation caches are managed according to a copy-on-write +//! strategy: Once a complete, consistent cache version is finalized, it is +//! never modified. Instead, when a subsequent compilation session is started, +//! the compiler will allocate a new version of the cache that starts out as +//! a copy of the previous version. Then only this new copy is modified and it +//! will not be visible to other processes until it is finalized. This ensures +//! that multiple compiler processes can be executed concurrently for the same +//! crate without interfering with each other or blocking each other. +//! +//! More concretely this is implemented via the following protocol: +//! +//! 1. For a newly started compilation session, the compiler allocates a +//! new `session` directory within the incremental compilation directory. +//! This session directory will have a unique name that ends with the suffix +//! "-working" and that contains a creation timestamp. +//! 2. Next, the compiler looks for the newest finalized session directory, +//! that is, a session directory from a previous compilation session that +//! has been marked as valid and consistent. A session directory is +//! considered finalized if the "-working" suffix in the directory name has +//! been replaced by the SVH of the crate. +//! 3. Once the compiler has found a valid, finalized session directory, it will +//! hard-link/copy its contents into the new "-working" directory. If all +//! goes well, it will have its own, private copy of the source directory and +//! subsequently not have to worry about synchronizing with other compiler +//! processes. +//! 4. Now the compiler can do its normal compilation process, which involves +//! reading and updating its private session directory. +//! 5. When compilation finishes without errors, the private session directory +//! will be in a state where it can be used as input for other compilation +//! sessions. That is, it will contain a dependency graph and cache artifacts +//! that are consistent with the state of the source code it was compiled +//! from, with no need to change them ever again. At this point, the compiler +//! finalizes and "publishes" its private session directory by renaming it +//! from "sess-{timestamp}-{random}-working" to "sess-{timestamp}-{SVH}". +//! 6. At this point the "old" session directory that we copied our data from +//! at the beginning of the session has become obsolete because we have just +//! published a more current version. Thus the compiler will delete it. +//! +//! ## Garbage Collection +//! +//! Naively following the above protocol might lead to old session directories +//! piling up if a compiler instance crashes for some reason before its able to +//! remove its private session directory. In order to avoid wasting disk space, +//! the compiler also does some garbage collection each time it is started in +//! incremental compilation mode. Specifically, it will scan the incremental +//! compilation directory for private session directories that are not in use +//! any more and will delete those. It will also delete any finalized session +//! directories for a given crate except for the most recent one. +//! +//! ## Synchronization +//! +//! There is some synchronization needed in order for the compiler to be able to +//! determine whether a given private session directory is not in used any more. +//! This is done by creating a lock file within each session directory and +//! locking it while the directory is still being used. Since file locks have +//! operating system support, we can rely on the lock being released if the +//! compiler process dies for some unexpected reason. Thus, when garbage +//! collecting private session directories, the collecting process can determine +//! whether the directory is still in use by trying to acquire a lock on the +//! file. If locking the file fails, the original process must still be alive. +//! If locking the file succeeds, we know that the owning process is not alive +//! any more and we can safely delete the directory. +//! There is still a small time window between the original process creating the +//! lock file and actually locking it. In order to minimize the chance that +//! another process tries to acquire the lock in just that instance, only +//! session directories that are older than a few seconds are considered for +//! garbage collection. +//! +//! Another case that has to be considered is what happens if one process +//! deletes a finalized session directory that another process is currently +//! trying to copy from. This case is also handled via the lock file. Before +//! a process starts copying a finalized session directory, it will acquire a +//! shared lock on the directory's lock file. Any garbage collecting process, +//! on the other hand, will acquire an exclusive lock on the lock file. +//! Thus, if a directory is being collected, any reader process will fail +//! acquiring the shared lock and will leave the directory alone. Conversely, +//! if a collecting process can't acquire the exclusive lock because the +//! directory is currently being read from, it will leave collecting that +//! directory to another process at a later point in time. +//! The exact same scheme is also used when reading the metadata hashes file +//! from an extern crate. When a crate is compiled, the hash values of its +//! metadata are stored in a file in its session directory. When the +//! compilation session of another crate imports the first crate's metadata, +//! it also has to read in the accompanying metadata hashes. It thus will access +//! the finalized session directory of all crates it links to and while doing +//! so, it will also place a read lock on that the respective session directory +//! so that it won't be deleted while the metadata hashes are loaded. +//! +//! ## Preconditions +//! +//! This system relies on two features being available in the file system in +//! order to work really well: file locking and hard linking. +//! If hard linking is not available (like on FAT) the data in the cache +//! actually has to be copied at the beginning of each session. +//! If file locking does not work reliably (like on NFS), some of the +//! synchronization will go haywire. +//! In both cases we recommend to locate the incremental compilation directory +//! on a file system that supports these things. +//! It might be a good idea though to try and detect whether we are on an +//! unsupported file system and emit a warning in that case. This is not yet +//! implemented. + +use rustc::hir::svh::Svh; +use rustc::middle::cstore::LOCAL_CRATE; +use rustc::session::Session; +use rustc::ty::TyCtxt; +use rustc::util::fs as fs_util; +use rustc_data_structures::flock; +use rustc_data_structures::fnv::{FnvHashSet, FnvHashMap}; + +use std::ffi::OsString; +use std::fs as std_fs; +use std::io; +use std::mem; +use std::path::{Path, PathBuf}; +use std::time::{UNIX_EPOCH, SystemTime, Duration}; +use std::__rand::{thread_rng, Rng}; +use syntax::ast; + +const LOCK_FILE_NAME: &'static str = ".lock_file"; +const DEP_GRAPH_FILENAME: &'static str = "dep-graph.bin"; +const WORK_PRODUCTS_FILENAME: &'static str = "work-products.bin"; +const METADATA_HASHES_FILENAME: &'static str = "metadata.bin"; + +pub fn dep_graph_path(sess: &Session) -> PathBuf { + in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME) +} + +pub fn work_products_path(sess: &Session) -> PathBuf { + in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME) +} + +pub fn metadata_hash_export_path(sess: &Session) -> PathBuf { + in_incr_comp_dir_sess(sess, METADATA_HASHES_FILENAME) +} + +pub fn metadata_hash_import_path(import_session_dir: &Path) -> PathBuf { + import_session_dir.join(METADATA_HASHES_FILENAME) +} + +pub fn lock_file_path(session_dir: &Path) -> PathBuf { + session_dir.join(LOCK_FILE_NAME) +} + +pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf { + in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name) +} + +pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf { + incr_comp_session_dir.join(file_name) +} + +/// Allocates the private session directory. The boolean in the Ok() result +/// indicates whether we should try loading a dep graph from the successfully +/// initialized directory, or not. +/// The post-condition of this fn is that we have a valid incremental +/// compilation session directory, if the result is `Ok`. A valid session +/// directory is one that contains a locked lock file. It may or may not contain +/// a dep-graph and work products from a previous session. +/// If the call fails, the fn may leave behind an invalid session directory. +/// The garbage collection will take care of it. +pub fn prepare_session_directory(tcx: TyCtxt) -> Result { + debug!("prepare_session_directory"); + + // {incr-comp-dir}/{crate-name-and-disambiguator} + let crate_dir = crate_path_tcx(tcx, LOCAL_CRATE); + debug!("crate-dir: {}", crate_dir.display()); + + let mut source_directories_already_tried = FnvHashSet(); + + loop { + // Allocate a session directory of the form: + // + // {incr-comp-dir}/{crate-name-and-disambiguator}/sess-{timestamp}-{random}-working + // + // If this fails, return an error, don't retry + let session_dir = try!(alloc_session_dir(tcx.sess, &crate_dir)); + debug!("session-dir: {}", session_dir.display()); + + // Lock the newly created session directory. If this fails, return an + // error without retrying + let directory_lock = try!(lock_directory(tcx.sess, &session_dir)); + + let print_file_copy_stats = tcx.sess.opts.debugging_opts.incremental_info; + + // Find a suitable source directory to copy from. Ignore those that we + // have already tried before. + let source_directory = find_source_directory(&crate_dir, + &source_directories_already_tried); + + let source_directory = if let Some(dir) = source_directory { + dir + } else { + // There's nowhere to copy from, we're done + debug!("no source directory found. Continuing with empty session \ + directory."); + + tcx.sess.init_incr_comp_session(session_dir, directory_lock); + return Ok(false) + }; + + debug!("attempting to copy data from source: {}", + source_directory.display()); + + // Try copying over all files from the source directory + if copy_files(&session_dir, &source_directory, print_file_copy_stats).is_ok() { + debug!("successfully copied data from: {}", + source_directory.display()); + + tcx.sess.init_incr_comp_session(session_dir, directory_lock); + return Ok(true) + } else { + debug!("copying failed - trying next directory"); + + // Something went wrong while trying to copy/link files from the + // source directory. Try again with a different one. + source_directories_already_tried.insert(source_directory); + + // Try to remove the session directory we just allocated. We don't + // know if there's any garbage in it from the failed copy action. + if let Err(err) = std_fs::remove_dir_all(&session_dir) { + debug!("Failed to delete partly initialized session dir `{}`: {}", + session_dir.display(), + err); + } + mem::drop(directory_lock); + } + } +} + +/// This function finalizes and thus 'publishes' the session directory by +/// renaming it to `sess-{timestamp}-{svh}` and releasing the file lock. +/// If there have been compilation errors, however, this function will just +/// delete the presumably invalid session directory. +pub fn finalize_session_directory(sess: &Session, svh: Svh) { + if sess.opts.incremental.is_none() { + return; + } + + let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone(); + + if sess.has_errors() { + // If there have been any errors during compilation, we don't want to + // publish this session directory. Rather, we'll just delete it. + + debug!("finalize_session_directory() - invalidating session directory: {}", + incr_comp_session_dir.display()); + + if let Err(err) = std_fs::remove_dir_all(&*incr_comp_session_dir) { + sess.warn(&format!("Error deleting incremental compilation \ + session directory `{}`: {}", + incr_comp_session_dir.display(), + err)); + } + sess.mark_incr_comp_session_as_invalid(); + } + + debug!("finalize_session_directory() - session directory: {}", + incr_comp_session_dir.display()); + + let old_sub_dir_name = incr_comp_session_dir.file_name() + .unwrap() + .to_string_lossy(); + assert_no_characters_lost(&old_sub_dir_name); + + // Keep the 'sess-{timestamp}' prefix, but replace the + // '-{random-number}-working' part with the SVH of the crate + let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-") + .map(|(idx, _)| idx) + .collect(); + if dash_indices.len() != 3 { + bug!("Encountered incremental compilation session directory with \ + malformed name: {}", + incr_comp_session_dir.display()) + } + + // State: "sess-{timestamp}-" + let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[1] + 1]); + + // Append the svh + new_sub_dir_name.push_str(&svh.to_string()); + + // Create the full path + let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); + debug!("finalize_session_directory() - new path: {}", new_path.display()); + + match std_fs::rename(&*incr_comp_session_dir, &new_path) { + Ok(_) => { + debug!("finalize_session_directory() - directory renamed successfully"); + + // This unlocks the directory + sess.finalize_incr_comp_session(new_path); + } + Err(e) => { + // Warn about the error. However, no need to abort compilation now. + sess.warn(&format!("Error finalizing incremental compilation \ + session directory `{}`: {}", + incr_comp_session_dir.display(), + e)); + + debug!("finalize_session_directory() - error, marking as invalid"); + // Drop the file lock, so we can garage collect + sess.mark_incr_comp_session_as_invalid(); + } + } + + let _ = garbage_collect_session_directories(sess); +} + +fn copy_files(target_dir: &Path, + source_dir: &Path, + print_stats_on_success: bool) + -> Result<(), ()> { + // We acquire a shared lock on the lock file of the directory, so that + // nobody deletes it out from under us while we are reading from it. + let lock_file_path = source_dir.join(LOCK_FILE_NAME); + let _lock = if let Ok(lock) = flock::Lock::new(&lock_file_path, + false, // don't wait, + false, // don't create + false) { // not exclusive + lock + } else { + // Could not acquire the lock, don't try to copy from here + return Err(()) + }; + + let source_dir_iterator = match source_dir.read_dir() { + Ok(it) => it, + Err(_) => return Err(()) + }; + + let mut files_linked = 0; + let mut files_copied = 0; + + for entry in source_dir_iterator { + match entry { + Ok(entry) => { + let file_name = entry.file_name(); + + if file_name.to_string_lossy() == LOCK_FILE_NAME { + continue; + } + + let target_file_path = target_dir.join(file_name); + let source_path = entry.path(); + + debug!("copying into session dir: {}", source_path.display()); + match fs_util::link_or_copy(source_path, target_file_path) { + Ok(fs_util::LinkOrCopy::Link) => { + files_linked += 1 + } + Ok(fs_util::LinkOrCopy::Copy) => { + files_copied += 1 + } + Err(_) => return Err(()) + } + } + Err(_) => { + return Err(()) + } + } + } + + if print_stats_on_success { + println!("incr. comp. session directory: {} files hard-linked", files_linked); + println!("incr. comp. session directory: {} files copied", files_copied); + } + + Ok(()) +} + +/// Create a directory with a path of the form: +/// {crate_dir}/sess-{timestamp}-{random-number}-working +fn alloc_session_dir(sess: &Session, + crate_dir: &Path) + -> Result { + let timestamp = timestamp_to_string(SystemTime::now()); + debug!("alloc_session_dir: timestamp = {}", timestamp); + let random_number = thread_rng().next_u32(); + debug!("alloc_session_dir: random_number = {}", random_number); + + let directory_name = format!("sess-{}-{:x}-working", timestamp, random_number); + debug!("alloc_session_dir: directory_name = {}", directory_name); + let directory_path = crate_dir.join(directory_name); + debug!("alloc_session_dir: directory_path = {}", directory_path.display()); + + match fs_util::create_dir_racy(&directory_path) { + Ok(()) => { + debug!("alloc_session_dir: directory created successfully"); + Ok(directory_path) + } + Err(err) => { + sess.err(&format!("incremental compilation: could not create \ + session directory `{}`: {}", + directory_path.display(), + err)); + Err(()) + } + } +} + +/// Allocate a the lock-file and lock it. +fn lock_directory(sess: &Session, + session_dir: &Path) + -> Result { + let lock_file_path = session_dir.join(LOCK_FILE_NAME); + debug!("lock_directory() - lock_file: {}", lock_file_path.display()); + + match flock::Lock::new(&lock_file_path, + false, // don't wait + true, // create the lock file + true) { // the lock should be exclusive + Ok(lock) => Ok(lock), + Err(err) => { + sess.err(&format!("incremental compilation: could not create \ + session directory lock file: {}", err)); + Err(()) + } + } +} + +/// Find the most recent published session directory that is not in the +/// ignore-list. +fn find_source_directory(crate_dir: &Path, + source_directories_already_tried: &FnvHashSet) + -> Option { + let iter = crate_dir.read_dir() + .unwrap() // FIXME + .filter_map(|e| e.ok().map(|e| e.path())); + + find_source_directory_in_iter(iter, source_directories_already_tried) +} + +fn find_source_directory_in_iter(iter: I, + source_directories_already_tried: &FnvHashSet) + -> Option + where I: Iterator +{ + let mut best_candidate = (UNIX_EPOCH, None); + + for session_dir in iter { + if source_directories_already_tried.contains(&session_dir) || + !is_finalized(&session_dir.to_string_lossy()) { + continue + } + + let timestamp = { + let directory_name = session_dir.file_name().unwrap().to_string_lossy(); + assert_no_characters_lost(&directory_name); + + extract_timestamp_from_session_dir(&directory_name) + .unwrap_or_else(|_| { + bug!("unexpected incr-comp session dir: {}", session_dir.display()) + }) + }; + + if timestamp > best_candidate.0 { + best_candidate = (timestamp, Some(session_dir)); + } + } + + best_candidate.1 +} + +fn is_finalized(directory_name: &str) -> bool { + !directory_name.ends_with("-working") +} + +fn is_session_directory(directory_name: &str) -> bool { + directory_name.starts_with("sess-") +} + +fn extract_timestamp_from_session_dir(directory_name: &str) + -> Result { + if !is_session_directory(directory_name) { + return Err(()) + } + + let dash_indices: Vec<_> = directory_name.match_indices("-") + .map(|(idx, _)| idx) + .collect(); + if dash_indices.len() < 2 { + return Err(()) + } + + string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]]) +} + +fn timestamp_to_string(timestamp: SystemTime) -> String { + let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); + let nanos = duration.as_secs() * 1_000_000_000 + + (duration.subsec_nanos() as u64); + format!("{:x}", nanos) +} + +fn string_to_timestamp(s: &str) -> Result { + let nanos_since_unix_epoch = u64::from_str_radix(s, 16); + + if nanos_since_unix_epoch.is_err() { + return Err(()) + } + + let nanos_since_unix_epoch = nanos_since_unix_epoch.unwrap(); + + let duration = Duration::new(nanos_since_unix_epoch / 1_000_000_000, + (nanos_since_unix_epoch % 1_000_000_000) as u32); + Ok(UNIX_EPOCH + duration) +} + +fn crate_path_tcx(tcx: TyCtxt, cnum: ast::CrateNum) -> PathBuf { + crate_path(tcx.sess, &tcx.crate_name(cnum), &tcx.crate_disambiguator(cnum)) +} + +/// Finds the session directory containing the correct metadata hashes file for +/// the given crate. In order to do that it has to compute the crate directory +/// of the given crate, and in there, look for the session directory with the +/// correct SVH in it. +/// Note that we have to match on the exact SVH here, not just the +/// crate's (name, disambiguator) pair. The metadata hashes are only valid for +/// the exact version of the binary we are reading from now (i.e. the hashes +/// are part of the dependency graph of a specific compilation session). +pub fn find_metadata_hashes_for(tcx: TyCtxt, cnum: ast::CrateNum) -> Option { + let crate_directory = crate_path_tcx(tcx, cnum); + + if !crate_directory.exists() { + return None + } + + let dir_entries = match crate_directory.read_dir() { + Ok(dir_entries) => dir_entries, + Err(e) => { + tcx.sess + .err(&format!("incremental compilation: Could not read crate directory `{}`: {}", + crate_directory.display(), e)); + return None + } + }; + + let target_svh = tcx.sess.cstore.crate_hash(cnum).to_string(); + + let sub_dir = find_metadata_hashes_iter(&target_svh, dir_entries.filter_map(|e| { + e.ok().map(|e| e.file_name().to_string_lossy().into_owned()) + })); + + sub_dir.map(|sub_dir_name| crate_directory.join(&sub_dir_name)) +} + +fn find_metadata_hashes_iter<'a, I>(target_svh: &str, iter: I) -> Option + where I: Iterator +{ + for sub_dir_name in iter { + if !is_session_directory(&sub_dir_name) || !is_finalized(&sub_dir_name) { + // This is not a usable session directory + continue + } + + let is_match = if let Some(last_dash_pos) = sub_dir_name.rfind("-") { + let candidate_svh = &sub_dir_name[last_dash_pos + 1 .. ]; + target_svh == candidate_svh + } else { + // some kind of invalid directory name + continue + }; + + if is_match { + return Some(OsString::from(sub_dir_name)) + } + } + + None +} + +fn crate_path(sess: &Session, + crate_name: &str, + crate_disambiguator: &str) + -> PathBuf { + use std::hash::{SipHasher, Hasher, Hash}; + + let incr_dir = sess.opts.incremental.as_ref().unwrap().clone(); + + // The full crate disambiguator is really long. A hash of it should be + // sufficient. + let mut hasher = SipHasher::new(); + crate_disambiguator.hash(&mut hasher); + + let crate_name = format!("{}-{:x}", crate_name, hasher.finish()); + incr_dir.join(crate_name) +} + +fn assert_no_characters_lost(s: &str) { + if s.contains('\u{FFFD}') { + bug!("Could not losslessly convert '{}'.", s) + } +} + +pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { + debug!("garbage_collect_session_directories() - begin"); + + let session_directory = sess.incr_comp_session_dir(); + debug!("garbage_collect_session_directories() - session directory: {}", + session_directory.display()); + + let crate_directory = session_directory.parent().unwrap(); + debug!("garbage_collect_session_directories() - crate directory: {}", + crate_directory.display()); + + let mut deletion_candidates = vec![]; + let mut definitely_delete = vec![]; + + for dir_entry in try!(crate_directory.read_dir()) { + let dir_entry = match dir_entry { + Ok(dir_entry) => dir_entry, + _ => { + // Ignore any errors + continue + } + }; + + let directory_name = dir_entry.file_name(); + let directory_name = directory_name.to_string_lossy(); + + if !is_session_directory(&directory_name) { + // This is something we don't know, leave it alone... + continue + } + assert_no_characters_lost(&directory_name); + + if let Ok(file_type) = dir_entry.file_type() { + if !file_type.is_dir() { + // This is not a directory, skip it + continue + } + } else { + // Some error occurred while trying to determine the file type, + // skip it + continue + } + + debug!("garbage_collect_session_directories() - inspecting: {}", + directory_name); + + match extract_timestamp_from_session_dir(&directory_name) { + Ok(timestamp) => { + let lock_file_path = crate_directory.join(&*directory_name) + .join(LOCK_FILE_NAME); + + if !is_finalized(&directory_name) { + let ten_seconds = Duration::from_secs(10); + + // When cleaning out "-working" session directories, i.e. + // session directories that might still be in use by another + // compiler instance, we only look a directories that are + // at least ten seconds old. This is supposed to reduce the + // chance of deleting a directory in the time window where + // the process has allocated the directory but has not yet + // acquired the file-lock on it. + if timestamp < SystemTime::now() - ten_seconds { + debug!("garbage_collect_session_directories() - \ + attempting to collect"); + + // Try to acquire the directory lock. If we can't, it + // means that the owning process is still alive and we + // leave this directory alone. + match flock::Lock::new(&lock_file_path, + false, // don't wait + false, // don't create the lock-file + true) { // get an exclusive lock + Ok(lock) => { + debug!("garbage_collect_session_directories() - \ + successfully acquired lock"); + + // Note that we are holding on to the lock + definitely_delete.push((dir_entry.path(), + Some(lock))); + } + Err(_) => { + debug!("garbage_collect_session_directories() - \ + not collecting, still in use"); + } + } + } else { + debug!("garbage_collect_session_directories() - \ + private session directory too new"); + } + } else { + match flock::Lock::new(&lock_file_path, + false, // don't wait + false, // don't create the lock-file + true) { // get an exclusive lock + Ok(lock) => { + debug!("garbage_collect_session_directories() - \ + successfully acquired lock"); + debug!("garbage_collect_session_directories() - adding \ + deletion candidate: {}", directory_name); + + // Note that we are holding on to the lock + deletion_candidates.push((timestamp, + dir_entry.path(), + Some(lock))); + } + Err(_) => { + debug!("garbage_collect_session_directories() - \ + not collecting, still in use"); + } + } + } + } + Err(_) => { + // Malformed timestamp in directory, delete it + definitely_delete.push((dir_entry.path(), None)); + + debug!("garbage_collect_session_directories() - encountered \ + malformed session directory: {}", directory_name); + } + } + } + + // Delete all but the most recent of the candidates + for (path, lock) in all_except_most_recent(deletion_candidates) { + debug!("garbage_collect_session_directories() - deleting `{}`", + path.display()); + + if let Err(err) = std_fs::remove_dir_all(&path) { + sess.warn(&format!("Failed to garbage collect finalized incremental \ + compilation session directory `{}`: {}", + path.display(), + err)); + } + + // Let's make it explicit that the file lock is released at this point, + // or rather, that we held on to it until here + mem::drop(lock); + } + + for (path, lock) in definitely_delete { + debug!("garbage_collect_session_directories() - deleting `{}`", + path.display()); + + if let Err(err) = std_fs::remove_dir_all(&path) { + sess.warn(&format!("Failed to garbage collect incremental \ + compilation session directory `{}`: {}", + path.display(), + err)); + } + + // Let's make it explicit that the file lock is released at this point, + // or rather, that we held on to it until here + mem::drop(lock); + } + + Ok(()) +} + +fn all_except_most_recent(deletion_candidates: Vec<(SystemTime, PathBuf, Option)>) + -> FnvHashMap> { + let most_recent = deletion_candidates.iter() + .map(|&(timestamp, _, _)| timestamp) + .max(); + + if let Some(most_recent) = most_recent { + deletion_candidates.into_iter() + .filter(|&(timestamp, _, _)| timestamp != most_recent) + .map(|(_, path, lock)| (path, lock)) + .collect() + } else { + FnvHashMap() + } +} + +#[test] +fn test_all_except_most_recent() { + assert_eq!(all_except_most_recent( + vec![ + (UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None), + (UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None), + (UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None), + (UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None), + (UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None), + ]).keys().cloned().collect::>(), + vec![ + PathBuf::from("1"), + PathBuf::from("2"), + PathBuf::from("3"), + PathBuf::from("4"), + ].into_iter().collect::>() + ); + + assert_eq!(all_except_most_recent( + vec![ + ]).keys().cloned().collect::>(), + FnvHashSet() + ); +} + +#[test] +fn test_timestamp_serialization() { + for i in 0 .. 1_000u64 { + let time = UNIX_EPOCH + Duration::new(i * 3_434_578, (i as u32) * 239_676); + let s = timestamp_to_string(time); + assert_eq!(time, string_to_timestamp(&s).unwrap()); + } +} + +#[test] +fn test_find_source_directory_in_iter() { + let already_visited = FnvHashSet(); + + // Find newest + assert_eq!(find_source_directory_in_iter( + vec![PathBuf::from("./sess-3234-0000"), + PathBuf::from("./sess-2234-0000"), + PathBuf::from("./sess-1234-0000")].into_iter(), &already_visited), + Some(PathBuf::from("./sess-3234-0000"))); + + // Filter out "-working" + assert_eq!(find_source_directory_in_iter( + vec![PathBuf::from("./sess-3234-0000-working"), + PathBuf::from("./sess-2234-0000"), + PathBuf::from("./sess-1234-0000")].into_iter(), &already_visited), + Some(PathBuf::from("./sess-2234-0000"))); + + // Handle empty + assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited), + None); + + // Handle only working + assert_eq!(find_source_directory_in_iter( + vec![PathBuf::from("./sess-3234-0000-working"), + PathBuf::from("./sess-2234-0000-working"), + PathBuf::from("./sess-1234-0000-working")].into_iter(), &already_visited), + None); +} + +#[test] +fn test_find_metadata_hashes_iter() +{ + assert_eq!(find_metadata_hashes_iter("testsvh2", + vec![ + String::from("sess-timestamp1-testsvh1"), + String::from("sess-timestamp2-testsvh2"), + String::from("sess-timestamp3-testsvh3"), + ].into_iter()), + Some(OsString::from("sess-timestamp2-testsvh2")) + ); + + assert_eq!(find_metadata_hashes_iter("testsvh2", + vec![ + String::from("sess-timestamp1-testsvh1"), + String::from("sess-timestamp2-testsvh2"), + String::from("invalid-name"), + ].into_iter()), + Some(OsString::from("sess-timestamp2-testsvh2")) + ); + + assert_eq!(find_metadata_hashes_iter("testsvh2", + vec![ + String::from("sess-timestamp1-testsvh1"), + String::from("sess-timestamp2-testsvh2-working"), + String::from("sess-timestamp3-testsvh3"), + ].into_iter()), + None + ); + + assert_eq!(find_metadata_hashes_iter("testsvh1", + vec![ + String::from("sess-timestamp1-random1-working"), + String::from("sess-timestamp2-random2-working"), + String::from("sess-timestamp3-random3-working"), + ].into_iter()), + None + ); + + assert_eq!(find_metadata_hashes_iter("testsvh2", + vec![ + String::from("timestamp1-testsvh2"), + String::from("timestamp2-testsvh2"), + String::from("timestamp3-testsvh2"), + ].into_iter()), + None + ); +} diff --git a/src/librustc_incremental/persist/hash.rs b/src/librustc_incremental/persist/hash.rs index 5d01f88060282..95bee669d3256 100644 --- a/src/librustc_incremental/persist/hash.rs +++ b/src/librustc_incremental/persist/hash.rs @@ -15,6 +15,7 @@ use rustc::hir::def_id::DefId; use rustc::hir::svh::Svh; use rustc::ty::TyCtxt; use rustc_data_structures::fnv::FnvHashMap; +use rustc_data_structures::flock; use rustc_serialize::Decodable; use std::io::{ErrorKind, Read}; use std::fs::File; @@ -22,7 +23,7 @@ use syntax::ast; use IncrementalHashesMap; use super::data::*; -use super::util::*; +use super::fs::*; pub struct HashContext<'a, 'tcx: 'a> { pub tcx: TyCtxt<'a, 'tcx, 'tcx>, @@ -128,19 +129,43 @@ impl<'a, 'tcx> HashContext<'a, 'tcx> { debug!("load_data: svh={}", svh); assert!(old.is_none(), "loaded data for crate {:?} twice", cnum); - if let Some(path) = metadata_hash_path(self.tcx, cnum) { - debug!("load_data: path={:?}", path); + if let Some(session_dir) = find_metadata_hashes_for(self.tcx, cnum) { + debug!("load_data: session_dir={:?}", session_dir); + + // Lock the directory we'll be reading the hashes from. + let lock_file_path = lock_file_path(&session_dir); + let _lock = match flock::Lock::new(&lock_file_path, + false, // don't wait + false, // don't create the lock-file + false) { // shared lock + Ok(lock) => lock, + Err(err) => { + debug!("Could not acquire lock on `{}` while trying to \ + load metadata hashes: {}", + lock_file_path.display(), + err); + + // Could not acquire the lock. The directory is probably in + // in the process of being deleted. It's OK to just exit + // here. It's the same scenario as if the file had not + // existed in the first place. + return + } + }; + + let hashes_file_path = metadata_hash_import_path(&session_dir); + let mut data = vec![]; match - File::open(&path) - .and_then(|mut file| file.read_to_end(&mut data)) + File::open(&hashes_file_path) + .and_then(|mut file| file.read_to_end(&mut data)) { Ok(_) => { - match self.load_from_data(cnum, &data) { + match self.load_from_data(cnum, &data, svh) { Ok(()) => { } Err(err) => { bug!("decoding error in dep-graph from `{}`: {}", - path.display(), err); + &hashes_file_path.display(), err); } } } @@ -152,7 +177,7 @@ impl<'a, 'tcx> HashContext<'a, 'tcx> { _ => { self.tcx.sess.err( &format!("could not load dep information from `{}`: {}", - path.display(), err)); + hashes_file_path.display(), err)); return; } } @@ -161,11 +186,22 @@ impl<'a, 'tcx> HashContext<'a, 'tcx> { } } - fn load_from_data(&mut self, cnum: ast::CrateNum, data: &[u8]) -> Result<(), Error> { + fn load_from_data(&mut self, + cnum: ast::CrateNum, + data: &[u8], + expected_svh: Svh) -> Result<(), Error> { debug!("load_from_data(cnum={})", cnum); // Load up the hashes for the def-ids from this crate. let mut decoder = Decoder::new(data, 0); + let svh_in_hashes_file = try!(Svh::decode(&mut decoder)); + + if svh_in_hashes_file != expected_svh { + // We should not be able to get here. If we do, then + // `fs::find_metadata_hashes_for()` has messed up. + bug!("mismatch between SVH in crate and SVH in incr. comp. hashes") + } + let serialized_hashes = try!(SerializedMetadataHashes::decode(&mut decoder)); for serialized_hash in serialized_hashes.hashes { // the hashes are stored with just a def-index, which is diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs index 75448d199f73e..cc4966eadae91 100644 --- a/src/librustc_incremental/persist/load.rs +++ b/src/librustc_incremental/persist/load.rs @@ -27,7 +27,7 @@ use super::data::*; use super::directory::*; use super::dirty_clean; use super::hash::*; -use super::util::*; +use super::fs::*; pub type DirtyNodes = FnvHashSet>; @@ -45,19 +45,38 @@ pub fn load_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, return; } + match prepare_session_directory(tcx) { + Ok(true) => { + // We successfully allocated a session directory and there is + // something in it to load, so continue + } + Ok(false) => { + // We successfully allocated a session directory, but there is no + // dep-graph data in it to load (because this is the first + // compilation session with this incr. comp. dir.) + return + } + Err(()) => { + // Something went wrong while trying to allocate the session + // directory. Don't try to use it any further. + let _ = garbage_collect_session_directories(tcx.sess); + return + } + } + let _ignore = tcx.dep_graph.in_ignore(); load_dep_graph_if_exists(tcx, incremental_hashes_map); } fn load_dep_graph_if_exists<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, incremental_hashes_map: &IncrementalHashesMap) { - let dep_graph_path = dep_graph_path(tcx).unwrap(); + let dep_graph_path = dep_graph_path(tcx.sess); let dep_graph_data = match load_data(tcx.sess, &dep_graph_path) { Some(p) => p, None => return // no file }; - let work_products_path = tcx_work_products_path(tcx).unwrap(); + let work_products_path = work_products_path(tcx.sess); let work_products_data = match load_data(tcx.sess, &work_products_path) { Some(p) => p, None => return // no file @@ -258,7 +277,7 @@ fn reconcile_work_products<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, .saved_files .iter() .all(|&(_, ref file_name)| { - let path = in_incr_comp_dir(tcx.sess, &file_name).unwrap(); + let path = in_incr_comp_dir_sess(tcx.sess, &file_name); path.exists() }); if all_files_exist { @@ -276,7 +295,7 @@ fn delete_dirty_work_product(tcx: TyCtxt, swp: SerializedWorkProduct) { debug!("delete_dirty_work_product({:?})", swp); for &(_, ref file_name) in &swp.work_product.saved_files { - let path = in_incr_comp_dir(tcx.sess, file_name).unwrap(); + let path = in_incr_comp_dir_sess(tcx.sess, file_name); match fs::remove_file(&path) { Ok(()) => { } Err(err) => { diff --git a/src/librustc_incremental/persist/mod.rs b/src/librustc_incremental/persist/mod.rs index 4a042497e0441..ba0f71971bb45 100644 --- a/src/librustc_incremental/persist/mod.rs +++ b/src/librustc_incremental/persist/mod.rs @@ -15,15 +15,16 @@ mod data; mod directory; mod dirty_clean; +mod fs; mod hash; mod load; mod preds; mod save; -mod util; mod work_product; +pub use self::fs::finalize_session_directory; +pub use self::fs::in_incr_comp_dir; pub use self::load::load_dep_graph; pub use self::save::save_dep_graph; pub use self::save::save_work_products; pub use self::work_product::save_trans_partition; -pub use self::util::in_incr_comp_dir; diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs index 74ee876d0bbc5..d31252be5e857 100644 --- a/src/librustc_incremental/persist/save.rs +++ b/src/librustc_incremental/persist/save.rs @@ -11,7 +11,7 @@ use rbml::opaque::Encoder; use rustc::dep_graph::DepNode; use rustc::hir::def_id::DefId; -use rustc::middle::cstore::LOCAL_CRATE; +use rustc::hir::svh::Svh; use rustc::session::Session; use rustc::ty::TyCtxt; use rustc_data_structures::fnv::FnvHashMap; @@ -26,10 +26,11 @@ use super::data::*; use super::directory::*; use super::hash::*; use super::preds::*; -use super::util::*; +use super::fs::*; pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, - incremental_hashes_map: &IncrementalHashesMap) { + incremental_hashes_map: &IncrementalHashesMap, + svh: Svh) { debug!("save_dep_graph()"); let _ignore = tcx.dep_graph.in_ignore(); let sess = tcx.sess; @@ -41,31 +42,31 @@ pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let query = tcx.dep_graph.query(); let preds = Predecessors::new(&query, &mut hcx); save_in(sess, - dep_graph_path(tcx), + dep_graph_path(sess), |e| encode_dep_graph(&preds, &mut builder, e)); save_in(sess, - metadata_hash_path(tcx, LOCAL_CRATE), - |e| encode_metadata_hashes(tcx, &preds, &mut builder, e)); + metadata_hash_export_path(sess), + |e| encode_metadata_hashes(tcx, svh, &preds, &mut builder, e)); } -pub fn save_work_products(sess: &Session, local_crate_name: &str) { +pub fn save_work_products(sess: &Session) { + if sess.opts.incremental.is_none() { + return; + } + debug!("save_work_products()"); let _ignore = sess.dep_graph.in_ignore(); - let path = sess_work_products_path(sess, local_crate_name); + let path = work_products_path(sess); save_in(sess, path, |e| encode_work_products(sess, e)); } -fn save_in(sess: &Session, opt_path_buf: Option, encode: F) +fn save_in(sess: &Session, path_buf: PathBuf, encode: F) where F: FnOnce(&mut Encoder) -> io::Result<()> { - let path_buf = match opt_path_buf { - Some(p) => p, - None => return, - }; - - // FIXME(#32754) lock file? - // delete the old dep-graph, if any + // Note: It's important that we actually delete the old file and not just + // truncate and overwrite it, since it might be a shared hard-link, the + // underlying data of which we don't want to modify if path_buf.exists() { match fs::remove_file(&path_buf) { Ok(()) => {} @@ -155,6 +156,7 @@ pub fn encode_dep_graph(preds: &Predecessors, } pub fn encode_metadata_hashes(tcx: TyCtxt, + svh: Svh, preds: &Predecessors, builder: &mut DefIdDirectoryBuilder, encoder: &mut Encoder) @@ -220,6 +222,7 @@ pub fn encode_metadata_hashes(tcx: TyCtxt, } // Encode everything. + try!(svh.encode(encoder)); try!(serialized_hashes.encode(encoder)); Ok(()) diff --git a/src/librustc_incremental/persist/util.rs b/src/librustc_incremental/persist/util.rs deleted file mode 100644 index f1e81fdb266b9..0000000000000 --- a/src/librustc_incremental/persist/util.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use rustc::middle::cstore::LOCAL_CRATE; -use rustc::session::Session; -use rustc::ty::TyCtxt; - -use std::fs; -use std::io; -use std::path::{Path, PathBuf}; -use syntax::ast; - -pub fn dep_graph_path(tcx: TyCtxt) -> Option { - tcx_path(tcx, LOCAL_CRATE, "local") -} - -pub fn metadata_hash_path(tcx: TyCtxt, cnum: ast::CrateNum) -> Option { - tcx_path(tcx, cnum, "metadata") -} - -pub fn tcx_work_products_path(tcx: TyCtxt) -> Option { - let crate_name = tcx.crate_name(LOCAL_CRATE); - sess_work_products_path(tcx.sess, &crate_name) -} - -pub fn sess_work_products_path(sess: &Session, - local_crate_name: &str) - -> Option { - let crate_disambiguator = sess.local_crate_disambiguator(); - path(sess, local_crate_name, &crate_disambiguator, "work-products") -} - -pub fn in_incr_comp_dir(sess: &Session, file_name: &str) -> Option { - sess.opts.incremental.as_ref().map(|incr_dir| incr_dir.join(file_name)) -} - -fn tcx_path(tcx: TyCtxt, - cnum: ast::CrateNum, - middle: &str) - -> Option { - path(tcx.sess, &tcx.crate_name(cnum), &tcx.crate_disambiguator(cnum), middle) -} - -fn path(sess: &Session, - crate_name: &str, - crate_disambiguator: &str, - middle: &str) - -> Option { - // For now, just save/load dep-graph from - // directory/dep_graph.rbml - sess.opts.incremental.as_ref().and_then(|incr_dir| { - match create_dir_racy(&incr_dir) { - Ok(()) => {} - Err(err) => { - sess.err( - &format!("could not create the directory `{}`: {}", - incr_dir.display(), err)); - return None; - } - } - - let file_name = format!("{}-{}.{}.bin", crate_name, crate_disambiguator, middle); - - Some(incr_dir.join(file_name)) - }) -} - -// Like std::fs::create_dir_all, except handles concurrent calls among multiple -// threads or processes. -fn create_dir_racy(path: &Path) -> io::Result<()> { - match fs::create_dir(path) { - Ok(()) => return Ok(()), - Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => return Ok(()), - Err(ref e) if e.kind() == io::ErrorKind::NotFound => {} - Err(e) => return Err(e), - } - match path.parent() { - Some(p) => try!(create_dir_racy(p)), - None => return Err(io::Error::new(io::ErrorKind::Other, - "failed to create whole tree")), - } - match fs::create_dir(path) { - Ok(()) => Ok(()), - Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => Ok(()), - Err(e) => Err(e), - } -} - diff --git a/src/librustc_incremental/persist/work_product.rs b/src/librustc_incremental/persist/work_product.rs index c106ea8f26269..a9ebd27ce9928 100644 --- a/src/librustc_incremental/persist/work_product.rs +++ b/src/librustc_incremental/persist/work_product.rs @@ -10,7 +10,7 @@ //! This module contains files for saving intermediate work-products. -use persist::util::*; +use persist::fs::*; use rustc::dep_graph::{WorkProduct, WorkProductId}; use rustc::session::Session; use rustc::session::config::OutputType; @@ -35,7 +35,7 @@ pub fn save_trans_partition(sess: &Session, files.iter() .map(|&(kind, ref path)| { let file_name = format!("cgu-{}.{}", cgu_name, kind.extension()); - let path_in_incr_dir = in_incr_comp_dir(sess, &file_name).unwrap(); + let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name); match link_or_copy(path, &path_in_incr_dir) { Ok(_) => Some((kind, file_name)), Err(err) => { diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs index 4b9c29d3d7db3..081b4431bd7b8 100644 --- a/src/librustc_trans/back/write.rs +++ b/src/librustc_trans/back/write.rs @@ -10,7 +10,7 @@ use back::lto; use back::link::{get_linker, remove}; -use rustc_incremental::save_trans_partition; +use rustc_incremental::{save_trans_partition, in_incr_comp_dir}; use session::config::{OutputFilenames, OutputTypes, Passes, SomePasses, AllPasses}; use session::Session; use session::config::{self, OutputType}; @@ -328,8 +328,9 @@ struct CodegenContext<'a> { remark: Passes, // Worker thread number worker: usize, - // Directory where incremental data is stored (if any) - incremental: Option, + // The incremental compilation session directory, or None if we are not + // compiling incrementally + incr_comp_session_dir: Option } impl<'a> CodegenContext<'a> { @@ -340,7 +341,7 @@ impl<'a> CodegenContext<'a> { plugin_passes: sess.plugin_llvm_passes.borrow().clone(), remark: sess.opts.cg.remark.clone(), worker: 0, - incremental: sess.opts.incremental.clone(), + incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()) } } } @@ -962,17 +963,20 @@ fn execute_work_item(cgcx: &CodegenContext, work_item.output_names); } ModuleSource::Preexisting(wp) => { - let incremental = cgcx.incremental.as_ref().unwrap(); + let incr_comp_session_dir = cgcx.incr_comp_session_dir + .as_ref() + .unwrap(); let name = &work_item.mtrans.name; for (kind, saved_file) in wp.saved_files { let obj_out = work_item.output_names.temp_path(kind, Some(name)); - let source_file = incremental.join(&saved_file); + let source_file = in_incr_comp_dir(&incr_comp_session_dir, + &saved_file); debug!("copying pre-existing module `{}` from {:?} to {}", work_item.mtrans.name, source_file, obj_out.display()); match link_or_copy(&source_file, &obj_out) { - Ok(()) => { } + Ok(_) => { } Err(err) => { cgcx.handler.err(&format!("unable to copy {} to {}: {}", source_file.display(), @@ -1018,7 +1022,7 @@ fn run_work_multithreaded(sess: &Session, let mut tx = Some(tx); futures.push(rx); - let incremental = sess.opts.incremental.clone(); + let incr_comp_session_dir = sess.incr_comp_session_dir_opt().map(|r| r.clone()); thread::Builder::new().name(format!("codegen-{}", i)).spawn(move || { let diag_handler = Handler::with_emitter(true, false, box diag_emitter); @@ -1031,7 +1035,7 @@ fn run_work_multithreaded(sess: &Session, plugin_passes: plugin_passes, remark: remark, worker: i, - incremental: incremental, + incr_comp_session_dir: incr_comp_session_dir }; loop { From 794fd315adb9bdfaacb28fa5571b3a63b954b010 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 22 Aug 2016 13:01:46 -0400 Subject: [PATCH 04/11] incr.comp.: Move lock files out of directory being locked --- src/librustc/session/mod.rs | 24 +- src/librustc_data_structures/flock.rs | 53 +-- src/librustc_data_structures/lib.rs | 2 +- src/librustc_incremental/persist/fs.rs | 409 ++++++++++++++--------- src/librustc_incremental/persist/load.rs | 1 - 5 files changed, 302 insertions(+), 187 deletions(-) diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs index 02ee4571ab5d9..338c656379959 100644 --- a/src/librustc/session/mod.rs +++ b/src/librustc/session/mod.rs @@ -366,25 +366,31 @@ impl Session { pub fn mark_incr_comp_session_as_invalid(&self) { let mut incr_comp_session = self.incr_comp_session.borrow_mut(); - if let IncrCompSession::Active { .. } = *incr_comp_session { } else { - bug!("Trying to invalidate IncrCompSession `{:?}`", *incr_comp_session) - } + let session_directory = match *incr_comp_session { + IncrCompSession::Active { ref session_directory, .. } => { + session_directory.clone() + } + _ => bug!("Trying to invalidate IncrCompSession `{:?}`", + *incr_comp_session), + }; // Note: This will also drop the lock file, thus unlocking the directory - *incr_comp_session = IncrCompSession::InvalidBecauseOfErrors; + *incr_comp_session = IncrCompSession::InvalidBecauseOfErrors { + session_directory: session_directory + }; } pub fn incr_comp_session_dir(&self) -> cell::Ref { let incr_comp_session = self.incr_comp_session.borrow(); cell::Ref::map(incr_comp_session, |incr_comp_session| { match *incr_comp_session { - IncrCompSession::NotInitialized | - IncrCompSession::InvalidBecauseOfErrors => { + IncrCompSession::NotInitialized => { bug!("Trying to get session directory from IncrCompSession `{:?}`", *incr_comp_session) } IncrCompSession::Active { ref session_directory, .. } | - IncrCompSession::Finalized { ref session_directory } => { + IncrCompSession::Finalized { ref session_directory } | + IncrCompSession::InvalidBecauseOfErrors { ref session_directory } => { session_directory } } @@ -541,7 +547,9 @@ pub enum IncrCompSession { // This is an error state that is reached when some compilation error has // occurred. It indicates that the contents of the session directory must // not be used, since they might be invalid. - InvalidBecauseOfErrors, + InvalidBecauseOfErrors { + session_directory: PathBuf, + } } fn init_llvm(sess: &Session) { diff --git a/src/librustc_data_structures/flock.rs b/src/librustc_data_structures/flock.rs index 22f8d76399519..4a184d3174dff 100644 --- a/src/librustc_data_structures/flock.rs +++ b/src/librustc_data_structures/flock.rs @@ -220,19 +220,18 @@ mod imp { use std::path::Path; use std::fs::{File, OpenOptions}; use std::os::raw::{c_ulong, c_ulonglong, c_int}; - use std::os::windows::fs::OpenOptionsExt; - pub type DWORD = c_ulong; - pub type BOOL = c_int; - pub type ULONG_PTR = c_ulonglong; + type DWORD = c_ulong; + type BOOL = c_int; + type ULONG_PTR = c_ulonglong; type LPOVERLAPPED = *mut OVERLAPPED; const LOCKFILE_EXCLUSIVE_LOCK: DWORD = 0x00000002; const LOCKFILE_FAIL_IMMEDIATELY: DWORD = 0x00000001; - pub const FILE_SHARE_DELETE: DWORD = 0x4; - pub const FILE_SHARE_READ: DWORD = 0x1; - pub const FILE_SHARE_WRITE: DWORD = 0x2; + const FILE_SHARE_DELETE: DWORD = 0x4; + const FILE_SHARE_READ: DWORD = 0x1; + const FILE_SHARE_WRITE: DWORD = 0x2; #[repr(C)] struct OVERLAPPED { @@ -263,19 +262,30 @@ mod imp { create: bool, exclusive: bool) -> io::Result { + assert!(p.parent().unwrap().exists(), + "Parent directory of lock-file must exist: {}", + p.display()); let share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE; - let f = { - let mut open_options = OpenOptions::new().read(true) - .share_mode(share_mode); - if create { - open_options.create(true); - } + let mut open_options = OpenOptions::new(); + open_options.read(true) + .share_mode(share_mode); + + if create { + open_options.create(true) + .write(true); + } - match open_options.open(p) { - Ok(file) => file, - Err(err) => return Err(err), + debug!("Attempting to open lock file `{}`", p.display()); + let file = match open_options.open(p) { + Ok(file) => { + debug!("Lock file opened successfully"); + file + } + Err(err) => { + debug!("Error opening lock file: {}", err); + return Err(err) } }; @@ -291,7 +301,9 @@ mod imp { dwFlags |= LOCKFILE_EXCLUSIVE_LOCK; } - LockFileEx(f.as_raw_handle(), + debug!("Attempting to acquire lock on lock file `{}`", + p.display()); + LockFileEx(file.as_raw_handle(), dwFlags, 0, 0xFFFF_FFFF, @@ -299,9 +311,12 @@ mod imp { &mut overlapped) }; if ret == 0 { - Err(io::Error::last_os_error()) + let err = io::Error::last_os_error(); + debug!("Failed acquiring file lock: {}", err); + Err(err) } else { - Ok(Lock { _file: f }) + debug!("Successfully acquired lock."); + Ok(Lock { _file: file }) } } } diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs index 4391123559f9b..e7da18cef10f9 100644 --- a/src/librustc_data_structures/lib.rs +++ b/src/librustc_data_structures/lib.rs @@ -30,8 +30,8 @@ #![feature(staged_api)] #![feature(unboxed_closures)] #![feature(fn_traits)] -#![feature(libc)] +#![cfg_attr(unix, feature(libc))] #![cfg_attr(test, feature(test))] extern crate core; diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs index c2990c66020b5..6eb3124e08eec 100644 --- a/src/librustc_incremental/persist/fs.rs +++ b/src/librustc_incremental/persist/fs.rs @@ -65,7 +65,7 @@ //! //! There is some synchronization needed in order for the compiler to be able to //! determine whether a given private session directory is not in used any more. -//! This is done by creating a lock file within each session directory and +//! This is done by creating a lock file for each session directory and //! locking it while the directory is still being used. Since file locks have //! operating system support, we can rely on the lock being released if the //! compiler process dies for some unexpected reason. Thus, when garbage @@ -131,7 +131,7 @@ use std::time::{UNIX_EPOCH, SystemTime, Duration}; use std::__rand::{thread_rng, Rng}; use syntax::ast; -const LOCK_FILE_NAME: &'static str = ".lock_file"; +const LOCK_FILE_EXT: &'static str = ".lock"; const DEP_GRAPH_FILENAME: &'static str = "dep-graph.bin"; const WORK_PRODUCTS_FILENAME: &'static str = "work-products.bin"; const METADATA_HASHES_FILENAME: &'static str = "metadata.bin"; @@ -153,7 +153,22 @@ pub fn metadata_hash_import_path(import_session_dir: &Path) -> PathBuf { } pub fn lock_file_path(session_dir: &Path) -> PathBuf { - session_dir.join(LOCK_FILE_NAME) + let crate_dir = session_dir.parent().unwrap(); + + let directory_name = session_dir.file_name().unwrap().to_string_lossy(); + assert_no_characters_lost(&directory_name); + + let dash_indices: Vec<_> = directory_name.match_indices("-") + .map(|(idx, _)| idx) + .collect(); + if dash_indices.len() != 3 { + bug!("Encountered incremental compilation session directory with \ + malformed name: {}", + session_dir.display()) + } + + crate_dir.join(&directory_name[0 .. dash_indices[2]]) + .with_extension(&LOCK_FILE_EXT[1..]) } pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf { @@ -179,23 +194,24 @@ pub fn prepare_session_directory(tcx: TyCtxt) -> Result { // {incr-comp-dir}/{crate-name-and-disambiguator} let crate_dir = crate_path_tcx(tcx, LOCAL_CRATE); debug!("crate-dir: {}", crate_dir.display()); + try!(create_dir(tcx.sess, &crate_dir, "crate")); let mut source_directories_already_tried = FnvHashSet(); loop { - // Allocate a session directory of the form: + // Generate a session directory of the form: // // {incr-comp-dir}/{crate-name-and-disambiguator}/sess-{timestamp}-{random}-working - // - // If this fails, return an error, don't retry - let session_dir = try!(alloc_session_dir(tcx.sess, &crate_dir)); + let session_dir = generate_session_dir_path(&crate_dir); debug!("session-dir: {}", session_dir.display()); - // Lock the newly created session directory. If this fails, return an + // Lock the new session directory. If this fails, return an // error without retrying - let directory_lock = try!(lock_directory(tcx.sess, &session_dir)); + let (directory_lock, lock_file_path) = try!(lock_directory(tcx.sess, &session_dir)); - let print_file_copy_stats = tcx.sess.opts.debugging_opts.incremental_info; + // Now that we have the lock, we can actually create the session + // directory + try!(create_dir(tcx.sess, &session_dir, "session")); // Find a suitable source directory to copy from. Ignore those that we // have already tried before. @@ -216,6 +232,8 @@ pub fn prepare_session_directory(tcx: TyCtxt) -> Result { debug!("attempting to copy data from source: {}", source_directory.display()); + let print_file_copy_stats = tcx.sess.opts.debugging_opts.incremental_info; + // Try copying over all files from the source directory if copy_files(&session_dir, &source_directory, print_file_copy_stats).is_ok() { debug!("successfully copied data from: {}", @@ -233,15 +251,19 @@ pub fn prepare_session_directory(tcx: TyCtxt) -> Result { // Try to remove the session directory we just allocated. We don't // know if there's any garbage in it from the failed copy action. if let Err(err) = std_fs::remove_dir_all(&session_dir) { - debug!("Failed to delete partly initialized session dir `{}`: {}", - session_dir.display(), - err); + tcx.sess.warn(&format!("Failed to delete partly initialized \ + session dir `{}`: {}", + session_dir.display(), + err)); } + + delete_session_dir_lock_file(tcx.sess, &lock_file_path); mem::drop(directory_lock); } } } + /// This function finalizes and thus 'publishes' the session directory by /// renaming it to `sess-{timestamp}-{svh}` and releasing the file lock. /// If there have been compilation errors, however, this function will just @@ -262,10 +284,13 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) { if let Err(err) = std_fs::remove_dir_all(&*incr_comp_session_dir) { sess.warn(&format!("Error deleting incremental compilation \ - session directory `{}`: {}", + session directory `{}`: {}", incr_comp_session_dir.display(), err)); } + + let lock_file_path = lock_file_path(&*incr_comp_session_dir); + delete_session_dir_lock_file(sess, &lock_file_path); sess.mark_incr_comp_session_as_invalid(); } @@ -277,8 +302,8 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) { .to_string_lossy(); assert_no_characters_lost(&old_sub_dir_name); - // Keep the 'sess-{timestamp}' prefix, but replace the - // '-{random-number}-working' part with the SVH of the crate + // Keep the 'sess-{timestamp}-{random-number}' prefix, but replace the + // '-working' part with the SVH of the crate let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-") .map(|(idx, _)| idx) .collect(); @@ -288,8 +313,8 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) { incr_comp_session_dir.display()) } - // State: "sess-{timestamp}-" - let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[1] + 1]); + // State: "sess-{timestamp}-{random-number}-" + let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]); // Append the svh new_sub_dir_name.push_str(&svh.to_string()); @@ -327,7 +352,7 @@ fn copy_files(target_dir: &Path, -> Result<(), ()> { // We acquire a shared lock on the lock file of the directory, so that // nobody deletes it out from under us while we are reading from it. - let lock_file_path = source_dir.join(LOCK_FILE_NAME); + let lock_file_path = lock_file_path(source_dir); let _lock = if let Ok(lock) = flock::Lock::new(&lock_file_path, false, // don't wait, false, // don't create @@ -351,10 +376,6 @@ fn copy_files(target_dir: &Path, Ok(entry) => { let file_name = entry.file_name(); - if file_name.to_string_lossy() == LOCK_FILE_NAME { - continue; - } - let target_file_path = target_dir.join(file_name); let source_path = entry.path(); @@ -383,30 +404,32 @@ fn copy_files(target_dir: &Path, Ok(()) } -/// Create a directory with a path of the form: +/// Generate unique directory path of the form: /// {crate_dir}/sess-{timestamp}-{random-number}-working -fn alloc_session_dir(sess: &Session, - crate_dir: &Path) - -> Result { +fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { let timestamp = timestamp_to_string(SystemTime::now()); - debug!("alloc_session_dir: timestamp = {}", timestamp); + debug!("generate_session_dir_path: timestamp = {}", timestamp); let random_number = thread_rng().next_u32(); - debug!("alloc_session_dir: random_number = {}", random_number); + debug!("generate_session_dir_path: random_number = {}", random_number); let directory_name = format!("sess-{}-{:x}-working", timestamp, random_number); - debug!("alloc_session_dir: directory_name = {}", directory_name); + debug!("generate_session_dir_path: directory_name = {}", directory_name); let directory_path = crate_dir.join(directory_name); - debug!("alloc_session_dir: directory_path = {}", directory_path.display()); + debug!("generate_session_dir_path: directory_path = {}", directory_path.display()); + directory_path +} - match fs_util::create_dir_racy(&directory_path) { +fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(),()> { + match fs_util::create_dir_racy(path) { Ok(()) => { - debug!("alloc_session_dir: directory created successfully"); - Ok(directory_path) + debug!("{} directory created successfully", dir_tag); + Ok(()) } Err(err) => { - sess.err(&format!("incremental compilation: could not create \ - session directory `{}`: {}", - directory_path.display(), + sess.err(&format!("Could not create incremental compilation {} \ + directory `{}`: {}", + dir_tag, + path.display(), err)); Err(()) } @@ -416,15 +439,15 @@ fn alloc_session_dir(sess: &Session, /// Allocate a the lock-file and lock it. fn lock_directory(sess: &Session, session_dir: &Path) - -> Result { - let lock_file_path = session_dir.join(LOCK_FILE_NAME); + -> Result<(flock::Lock, PathBuf), ()> { + let lock_file_path = lock_file_path(session_dir); debug!("lock_directory() - lock_file: {}", lock_file_path.display()); match flock::Lock::new(&lock_file_path, false, // don't wait true, // create the lock file true) { // the lock should be exclusive - Ok(lock) => Ok(lock), + Ok(lock) => Ok((lock, lock_file_path)), Err(err) => { sess.err(&format!("incremental compilation: could not create \ session directory lock file: {}", err)); @@ -433,6 +456,16 @@ fn lock_directory(sess: &Session, } } +fn delete_session_dir_lock_file(sess: &Session, + lock_file_path: &Path) { + if let Err(err) = std_fs::remove_file(&lock_file_path) { + sess.warn(&format!("Error deleting lock file for incremental \ + compilation session directory `{}`: {}", + lock_file_path.display(), + err)); + } +} + /// Find the most recent published session directory that is not in the /// ignore-list. fn find_source_directory(crate_dir: &Path, @@ -453,23 +486,26 @@ fn find_source_directory_in_iter(iter: I, let mut best_candidate = (UNIX_EPOCH, None); for session_dir in iter { + debug!("find_source_directory_in_iter - inspecting `{}`", + session_dir.display()); + + let directory_name = session_dir.file_name().unwrap().to_string_lossy(); + assert_no_characters_lost(&directory_name); + if source_directories_already_tried.contains(&session_dir) || - !is_finalized(&session_dir.to_string_lossy()) { + !is_session_directory(&directory_name) || + !is_finalized(&directory_name) { + debug!("find_source_directory_in_iter - ignoring."); continue } - let timestamp = { - let directory_name = session_dir.file_name().unwrap().to_string_lossy(); - assert_no_characters_lost(&directory_name); - - extract_timestamp_from_session_dir(&directory_name) - .unwrap_or_else(|_| { - bug!("unexpected incr-comp session dir: {}", session_dir.display()) - }) - }; + let timestamp = extract_timestamp_from_session_dir(&directory_name) + .unwrap_or_else(|_| { + bug!("unexpected incr-comp session dir: {}", session_dir.display()) + }); if timestamp > best_candidate.0 { - best_candidate = (timestamp, Some(session_dir)); + best_candidate = (timestamp, Some(session_dir.clone())); } } @@ -481,7 +517,12 @@ fn is_finalized(directory_name: &str) -> bool { } fn is_session_directory(directory_name: &str) -> bool { - directory_name.starts_with("sess-") + directory_name.starts_with("sess-") && + !directory_name.ends_with(LOCK_FILE_EXT) +} + +fn is_session_directory_lock_file(file_name: &str) -> bool { + file_name.starts_with("sess-") && file_name.ends_with(LOCK_FILE_EXT) } fn extract_timestamp_from_session_dir(directory_name: &str) @@ -493,7 +534,7 @@ fn extract_timestamp_from_session_dir(directory_name: &str) let dash_indices: Vec<_> = directory_name.match_indices("-") .map(|(idx, _)| idx) .collect(); - if dash_indices.len() < 2 { + if dash_indices.len() != 3 { return Err(()) } @@ -502,22 +543,22 @@ fn extract_timestamp_from_session_dir(directory_name: &str) fn timestamp_to_string(timestamp: SystemTime) -> String { let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); - let nanos = duration.as_secs() * 1_000_000_000 + - (duration.subsec_nanos() as u64); - format!("{:x}", nanos) + let micros = duration.as_secs() * 1_000_000 + + (duration.subsec_nanos() as u64) / 1000; + format!("{:x}", micros) } fn string_to_timestamp(s: &str) -> Result { - let nanos_since_unix_epoch = u64::from_str_radix(s, 16); + let micros_since_unix_epoch = u64::from_str_radix(s, 16); - if nanos_since_unix_epoch.is_err() { + if micros_since_unix_epoch.is_err() { return Err(()) } - let nanos_since_unix_epoch = nanos_since_unix_epoch.unwrap(); + let micros_since_unix_epoch = micros_since_unix_epoch.unwrap(); - let duration = Duration::new(nanos_since_unix_epoch / 1_000_000_000, - (nanos_since_unix_epoch % 1_000_000_000) as u32); + let duration = Duration::new(micros_since_unix_epoch / 1_000_000, + 1000 * (micros_since_unix_epoch % 1_000_000) as u32); Ok(UNIX_EPOCH + duration) } @@ -607,6 +648,10 @@ fn assert_no_characters_lost(s: &str) { } } +fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool { + timestamp < SystemTime::now() - Duration::from_secs(10) +} + pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { debug!("garbage_collect_session_directories() - begin"); @@ -618,8 +663,10 @@ pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { debug!("garbage_collect_session_directories() - crate directory: {}", crate_directory.display()); - let mut deletion_candidates = vec![]; - let mut definitely_delete = vec![]; + // First do a pass over the crate directory, collecting lock files and + // session directories + let mut session_directories = FnvHashSet(); + let mut lock_files = FnvHashSet(); for dir_entry in try!(crate_directory.read_dir()) { let dir_entry = match dir_entry { @@ -630,102 +677,143 @@ pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { } }; - let directory_name = dir_entry.file_name(); - let directory_name = directory_name.to_string_lossy(); + let entry_name = dir_entry.file_name(); + let entry_name = entry_name.to_string_lossy(); - if !is_session_directory(&directory_name) { - // This is something we don't know, leave it alone... - continue + if is_session_directory_lock_file(&entry_name) { + assert_no_characters_lost(&entry_name); + lock_files.insert(entry_name.into_owned()); + } else if is_session_directory(&entry_name) { + assert_no_characters_lost(&entry_name); + session_directories.insert(entry_name.into_owned()); + } else { + // This is something we don't know, leave it alone } - assert_no_characters_lost(&directory_name); + } - if let Ok(file_type) = dir_entry.file_type() { - if !file_type.is_dir() { - // This is not a directory, skip it - continue + // Now map from lock files to session directories + let lock_file_to_session_dir: FnvHashMap> = + lock_files.into_iter() + .map(|lock_file_name| { + assert!(lock_file_name.ends_with(LOCK_FILE_EXT)); + let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len(); + let session_dir = { + let dir_prefix = &lock_file_name[0 .. dir_prefix_end]; + session_directories.iter() + .find(|dir_name| dir_name.starts_with(dir_prefix)) + }; + (lock_file_name, session_dir.map(String::clone)) + }) + .collect(); + + // Delete all lock files, that don't have an associated directory. They must + // be some kind of leftover + for (lock_file_name, directory_name) in &lock_file_to_session_dir { + if directory_name.is_none() { + let timestamp = match extract_timestamp_from_session_dir(lock_file_name) { + Ok(timestamp) => timestamp, + Err(()) => { + debug!("Found lock-file with malformed timestamp: {}", + crate_directory.join(&lock_file_name).display()); + // Ignore it + continue + } + }; + + let lock_file_path = crate_directory.join(&**lock_file_name); + + if is_old_enough_to_be_collected(timestamp) { + debug!("garbage_collect_session_directories() - deleting \ + garbage lock file: {}", lock_file_path.display()); + delete_session_dir_lock_file(sess, &lock_file_path); + } else { + debug!("garbage_collect_session_directories() - lock file with \ + no session dir not old enough to be collected: {}", + lock_file_path.display()); } - } else { - // Some error occurred while trying to determine the file type, - // skip it - continue } + } + + // Filter out `None` directories + let lock_file_to_session_dir: FnvHashMap = + lock_file_to_session_dir.into_iter() + .filter_map(|(lock_file_name, directory_name)| { + directory_name.map(|n| (lock_file_name, n)) + }) + .collect(); + + let mut deletion_candidates = vec![]; + let mut definitely_delete = vec![]; + for (lock_file_name, directory_name) in &lock_file_to_session_dir { debug!("garbage_collect_session_directories() - inspecting: {}", directory_name); - match extract_timestamp_from_session_dir(&directory_name) { - Ok(timestamp) => { - let lock_file_path = crate_directory.join(&*directory_name) - .join(LOCK_FILE_NAME); - - if !is_finalized(&directory_name) { - let ten_seconds = Duration::from_secs(10); - - // When cleaning out "-working" session directories, i.e. - // session directories that might still be in use by another - // compiler instance, we only look a directories that are - // at least ten seconds old. This is supposed to reduce the - // chance of deleting a directory in the time window where - // the process has allocated the directory but has not yet - // acquired the file-lock on it. - if timestamp < SystemTime::now() - ten_seconds { - debug!("garbage_collect_session_directories() - \ - attempting to collect"); - - // Try to acquire the directory lock. If we can't, it - // means that the owning process is still alive and we - // leave this directory alone. - match flock::Lock::new(&lock_file_path, - false, // don't wait - false, // don't create the lock-file - true) { // get an exclusive lock - Ok(lock) => { - debug!("garbage_collect_session_directories() - \ - successfully acquired lock"); - - // Note that we are holding on to the lock - definitely_delete.push((dir_entry.path(), - Some(lock))); - } - Err(_) => { - debug!("garbage_collect_session_directories() - \ - not collecting, still in use"); - } - } - } else { - debug!("garbage_collect_session_directories() - \ - private session directory too new"); - } - } else { - match flock::Lock::new(&lock_file_path, - false, // don't wait - false, // don't create the lock-file - true) { // get an exclusive lock - Ok(lock) => { - debug!("garbage_collect_session_directories() - \ - successfully acquired lock"); - debug!("garbage_collect_session_directories() - adding \ - deletion candidate: {}", directory_name); - - // Note that we are holding on to the lock - deletion_candidates.push((timestamp, - dir_entry.path(), - Some(lock))); - } - Err(_) => { - debug!("garbage_collect_session_directories() - \ + let timestamp = match extract_timestamp_from_session_dir(directory_name) { + Ok(timestamp) => timestamp, + Err(()) => { + debug!("Found session-dir with malformed timestamp: {}", + crate_directory.join(directory_name).display()); + // Ignore it + continue + } + }; + + if is_finalized(directory_name) { + let lock_file_path = crate_directory.join(lock_file_name); + match flock::Lock::new(&lock_file_path, + false, // don't wait + false, // don't create the lock-file + true) { // get an exclusive lock + Ok(lock) => { + debug!("garbage_collect_session_directories() - \ + successfully acquired lock"); + debug!("garbage_collect_session_directories() - adding \ + deletion candidate: {}", directory_name); + + // Note that we are holding on to the lock + deletion_candidates.push((timestamp, + crate_directory.join(directory_name), + Some(lock))); + } + Err(_) => { + debug!("garbage_collect_session_directories() - \ not collecting, still in use"); - } - } } } - Err(_) => { - // Malformed timestamp in directory, delete it - definitely_delete.push((dir_entry.path(), None)); - - debug!("garbage_collect_session_directories() - encountered \ - malformed session directory: {}", directory_name); + } else if is_old_enough_to_be_collected(timestamp) { + // When cleaning out "-working" session directories, i.e. + // session directories that might still be in use by another + // compiler instance, we only look a directories that are + // at least ten seconds old. This is supposed to reduce the + // chance of deleting a directory in the time window where + // the process has allocated the directory but has not yet + // acquired the file-lock on it. + + // Try to acquire the directory lock. If we can't, it + // means that the owning process is still alive and we + // leave this directory alone. + let lock_file_path = crate_directory.join(lock_file_name); + match flock::Lock::new(&lock_file_path, + false, // don't wait + false, // don't create the lock-file + true) { // get an exclusive lock + Ok(lock) => { + debug!("garbage_collect_session_directories() - \ + successfully acquired lock"); + + // Note that we are holding on to the lock + definitely_delete.push((crate_directory.join(directory_name), + Some(lock))); + } + Err(_) => { + debug!("garbage_collect_session_directories() - \ + not collecting, still in use"); + } } + } else { + debug!("garbage_collect_session_directories() - not finalized, not \ + old enough"); } } @@ -739,8 +827,11 @@ pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { compilation session directory `{}`: {}", path.display(), err)); + } else { + delete_session_dir_lock_file(sess, &lock_file_path(&path)); } + // Let's make it explicit that the file lock is released at this point, // or rather, that we held on to it until here mem::drop(lock); @@ -755,6 +846,8 @@ pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { compilation session directory `{}`: {}", path.display(), err)); + } else { + delete_session_dir_lock_file(sess, &lock_file_path(&path)); } // Let's make it explicit that the file lock is released at this point, @@ -809,7 +902,7 @@ fn test_all_except_most_recent() { #[test] fn test_timestamp_serialization() { for i in 0 .. 1_000u64 { - let time = UNIX_EPOCH + Duration::new(i * 3_434_578, (i as u32) * 239_676); + let time = UNIX_EPOCH + Duration::new(i * 3_434_578, (i as u32) * 239_000); let s = timestamp_to_string(time); assert_eq!(time, string_to_timestamp(&s).unwrap()); } @@ -821,17 +914,17 @@ fn test_find_source_directory_in_iter() { // Find newest assert_eq!(find_source_directory_in_iter( - vec![PathBuf::from("./sess-3234-0000"), - PathBuf::from("./sess-2234-0000"), - PathBuf::from("./sess-1234-0000")].into_iter(), &already_visited), - Some(PathBuf::from("./sess-3234-0000"))); + vec![PathBuf::from("crate-dir/sess-3234-0000-svh"), + PathBuf::from("crate-dir/sess-2234-0000-svh"), + PathBuf::from("crate-dir/sess-1234-0000-svh")].into_iter(), &already_visited), + Some(PathBuf::from("crate-dir/sess-3234-0000-svh"))); // Filter out "-working" assert_eq!(find_source_directory_in_iter( - vec![PathBuf::from("./sess-3234-0000-working"), - PathBuf::from("./sess-2234-0000"), - PathBuf::from("./sess-1234-0000")].into_iter(), &already_visited), - Some(PathBuf::from("./sess-2234-0000"))); + vec![PathBuf::from("crate-dir/sess-3234-0000-working"), + PathBuf::from("crate-dir/sess-2234-0000-svh"), + PathBuf::from("crate-dir/sess-1234-0000-svh")].into_iter(), &already_visited), + Some(PathBuf::from("crate-dir/sess-2234-0000-svh"))); // Handle empty assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited), @@ -839,9 +932,9 @@ fn test_find_source_directory_in_iter() { // Handle only working assert_eq!(find_source_directory_in_iter( - vec![PathBuf::from("./sess-3234-0000-working"), - PathBuf::from("./sess-2234-0000-working"), - PathBuf::from("./sess-1234-0000-working")].into_iter(), &already_visited), + vec![PathBuf::from("crate-dir/sess-3234-0000-working"), + PathBuf::from("crate-dir/sess-2234-0000-working"), + PathBuf::from("crate-dir/sess-1234-0000-working")].into_iter(), &already_visited), None); } diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs index cc4966eadae91..48f95430f26b6 100644 --- a/src/librustc_incremental/persist/load.rs +++ b/src/librustc_incremental/persist/load.rs @@ -59,7 +59,6 @@ pub fn load_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, Err(()) => { // Something went wrong while trying to allocate the session // directory. Don't try to use it any further. - let _ = garbage_collect_session_directories(tcx.sess); return } } From 004a7eb127eca62908b6c4ff7d24a9685f21ae85 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Thu, 25 Aug 2016 20:48:13 -0400 Subject: [PATCH 05/11] Fix rustbuild --- src/librustdoc/Cargo.toml | 1 + src/rustc/Cargo.lock | 1 + 2 files changed, 2 insertions(+) diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 3e510bdc9002e..d66d2001f2304 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -16,6 +16,7 @@ rustc_back = { path = "../librustc_back" } rustc_const_eval = { path = "../librustc_const_eval" } rustc_const_math = { path = "../librustc_const_math" } rustc_driver = { path = "../librustc_driver" } +rustc_data_structures = { path = "../librustc_data_structures" } rustc_errors = { path = "../librustc_errors" } rustc_lint = { path = "../librustc_lint" } rustc_metadata = { path = "../librustc_metadata" } diff --git a/src/rustc/Cargo.lock b/src/rustc/Cargo.lock index c0db651d7d2b4..fde2f83e220f9 100644 --- a/src/rustc/Cargo.lock +++ b/src/rustc/Cargo.lock @@ -363,6 +363,7 @@ dependencies = [ "rustc_back 0.0.0", "rustc_const_eval 0.0.0", "rustc_const_math 0.0.0", + "rustc_data_structures 0.0.0", "rustc_driver 0.0.0", "rustc_errors 0.0.0", "rustc_lint 0.0.0", From b67f57afdd80e75c93c4ae49e12b8bb16d209916 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Fri, 26 Aug 2016 12:26:20 -0400 Subject: [PATCH 06/11] incr. comp.: Fix test_timestamp_serialization so it does not overflow on some 32bit systems. --- src/librustc_incremental/persist/fs.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs index 6eb3124e08eec..9e4a16fd43c0d 100644 --- a/src/librustc_incremental/persist/fs.rs +++ b/src/librustc_incremental/persist/fs.rs @@ -902,9 +902,9 @@ fn test_all_except_most_recent() { #[test] fn test_timestamp_serialization() { for i in 0 .. 1_000u64 { - let time = UNIX_EPOCH + Duration::new(i * 3_434_578, (i as u32) * 239_000); + let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000); let s = timestamp_to_string(time); - assert_eq!(time, string_to_timestamp(&s).unwrap()); + assert_eq!(Ok(time), string_to_timestamp(&s)); } } From a3dc5f95aa06b26a056f67cdc5e8438e80e8394c Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Fri, 26 Aug 2016 16:50:24 -0400 Subject: [PATCH 07/11] incr.comp.: Make path's of session directories slightly shorter. By using "s-" instead of "sess-" as a prefix and encoding numbers as base36 instead of base16. --- src/librustc_incremental/persist/fs.rs | 113 ++++++++++++++++--------- 1 file changed, 75 insertions(+), 38 deletions(-) diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs index 9e4a16fd43c0d..809e1324c1efe 100644 --- a/src/librustc_incremental/persist/fs.rs +++ b/src/librustc_incremental/persist/fs.rs @@ -45,7 +45,7 @@ //! that are consistent with the state of the source code it was compiled //! from, with no need to change them ever again. At this point, the compiler //! finalizes and "publishes" its private session directory by renaming it -//! from "sess-{timestamp}-{random}-working" to "sess-{timestamp}-{SVH}". +//! from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}". //! 6. At this point the "old" session directory that we copied our data from //! at the beginning of the session has become obsolete because we have just //! published a more current version. Thus the compiler will delete it. @@ -201,7 +201,7 @@ pub fn prepare_session_directory(tcx: TyCtxt) -> Result { loop { // Generate a session directory of the form: // - // {incr-comp-dir}/{crate-name-and-disambiguator}/sess-{timestamp}-{random}-working + // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working let session_dir = generate_session_dir_path(&crate_dir); debug!("session-dir: {}", session_dir.display()); @@ -265,7 +265,7 @@ pub fn prepare_session_directory(tcx: TyCtxt) -> Result { /// This function finalizes and thus 'publishes' the session directory by -/// renaming it to `sess-{timestamp}-{svh}` and releasing the file lock. +/// renaming it to `s-{timestamp}-{svh}` and releasing the file lock. /// If there have been compilation errors, however, this function will just /// delete the presumably invalid session directory. pub fn finalize_session_directory(sess: &Session, svh: Svh) { @@ -302,7 +302,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) { .to_string_lossy(); assert_no_characters_lost(&old_sub_dir_name); - // Keep the 'sess-{timestamp}-{random-number}' prefix, but replace the + // Keep the 's-{timestamp}-{random-number}' prefix, but replace the // '-working' part with the SVH of the crate let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-") .map(|(idx, _)| idx) @@ -313,11 +313,11 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) { incr_comp_session_dir.display()) } - // State: "sess-{timestamp}-{random-number}-" + // State: "s-{timestamp}-{random-number}-" let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]); // Append the svh - new_sub_dir_name.push_str(&svh.to_string()); + new_sub_dir_name.push_str(&encode_base_36(svh.as_u64())); // Create the full path let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); @@ -405,14 +405,16 @@ fn copy_files(target_dir: &Path, } /// Generate unique directory path of the form: -/// {crate_dir}/sess-{timestamp}-{random-number}-working +/// {crate_dir}/s-{timestamp}-{random-number}-working fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { let timestamp = timestamp_to_string(SystemTime::now()); debug!("generate_session_dir_path: timestamp = {}", timestamp); let random_number = thread_rng().next_u32(); debug!("generate_session_dir_path: random_number = {}", random_number); - let directory_name = format!("sess-{}-{:x}-working", timestamp, random_number); + let directory_name = format!("s-{}-{}-working", + timestamp, + encode_base_36(random_number as u64)); debug!("generate_session_dir_path: directory_name = {}", directory_name); let directory_path = crate_dir.join(directory_name); debug!("generate_session_dir_path: directory_path = {}", directory_path.display()); @@ -517,12 +519,12 @@ fn is_finalized(directory_name: &str) -> bool { } fn is_session_directory(directory_name: &str) -> bool { - directory_name.starts_with("sess-") && + directory_name.starts_with("s-") && !directory_name.ends_with(LOCK_FILE_EXT) } fn is_session_directory_lock_file(file_name: &str) -> bool { - file_name.starts_with("sess-") && file_name.ends_with(LOCK_FILE_EXT) + file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT) } fn extract_timestamp_from_session_dir(directory_name: &str) @@ -541,15 +543,31 @@ fn extract_timestamp_from_session_dir(directory_name: &str) string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]]) } +const BASE_36: &'static [u8] = b"0123456789abcdefghijklmnopqrstuvwxyz"; + +fn encode_base_36(mut n: u64) -> String { + let mut s = Vec::with_capacity(13); + loop { + s.push(BASE_36[(n % 36) as usize]); + n /= 36; + + if n == 0 { + break; + } + } + s.reverse(); + String::from_utf8(s).unwrap() +} + fn timestamp_to_string(timestamp: SystemTime) -> String { let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000; - format!("{:x}", micros) + encode_base_36(micros) } fn string_to_timestamp(s: &str) -> Result { - let micros_since_unix_epoch = u64::from_str_radix(s, 16); + let micros_since_unix_epoch = u64::from_str_radix(s, 36); if micros_since_unix_epoch.is_err() { return Err(()) @@ -591,7 +609,8 @@ pub fn find_metadata_hashes_for(tcx: TyCtxt, cnum: ast::CrateNum) -> Option Date: Fri, 26 Aug 2016 16:53:19 -0400 Subject: [PATCH 08/11] incr.comp.: Make compiletest generate shorter cache directory names. --- src/tools/compiletest/src/runtest.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index 60a0d8f0b865f..aa4510746fdc3 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -2041,7 +2041,7 @@ actual:\n\ /// Directory where incremental work products are stored. fn incremental_dir(&self) -> PathBuf { - self.output_base_name().with_extension("incremental") + self.output_base_name().with_extension("inc") } fn run_rmake_test(&self) { From 68d2275a97f62c2b3399b49453876dd56e82f0eb Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Sat, 27 Aug 2016 14:37:40 -0400 Subject: [PATCH 09/11] Fix tidy-errors --- src/librustc_incremental/persist/fs.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs index 809e1324c1efe..b0d71e47e409e 100644 --- a/src/librustc_incremental/persist/fs.rs +++ b/src/librustc_incremental/persist/fs.rs @@ -412,7 +412,7 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { let random_number = thread_rng().next_u32(); debug!("generate_session_dir_path: random_number = {}", random_number); - let directory_name = format!("s-{}-{}-working", + let directory_name = format!("s-{}-{}-working", timestamp, encode_base_36(random_number as u64)); debug!("generate_session_dir_path: directory_name = {}", directory_name); @@ -1005,7 +1005,7 @@ fn test_find_metadata_hashes_iter() None ); } - + #[test] fn test_encode_base_36() { fn test(n: u64) { From 50b008ae3b9d15765d829ba336856f7057c9bb0c Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 29 Aug 2016 13:38:09 -0400 Subject: [PATCH 10/11] compiletest: Canonicalize paths when remove incr.comp. dir, enabling longer paths --- src/tools/compiletest/src/runtest.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index aa4510746fdc3..228d6ada01dcc 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -1976,7 +1976,10 @@ actual:\n\ // runs. let incremental_dir = self.incremental_dir(); if incremental_dir.exists() { - fs::remove_dir_all(&incremental_dir).unwrap(); + // Canonicalizing the path will convert it to the //?/ format + // on Windows, which enables paths longer than 260 character + let canonicalized = incremental_dir.canonicalize().unwrap(); + fs::remove_dir_all(canonicalized).unwrap(); } fs::create_dir_all(&incremental_dir).unwrap(); From bcd2f905c46158f9137fa5b63aafebcb60083385 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 29 Aug 2016 14:18:26 -0400 Subject: [PATCH 11/11] incr.comp.: Canonicalize path to session directory before deleteing it. --- src/librustc_incremental/persist/fs.rs | 34 ++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs index b0d71e47e409e..4ad4b115759c4 100644 --- a/src/librustc_incremental/persist/fs.rs +++ b/src/librustc_incremental/persist/fs.rs @@ -250,7 +250,7 @@ pub fn prepare_session_directory(tcx: TyCtxt) -> Result { // Try to remove the session directory we just allocated. We don't // know if there's any garbage in it from the failed copy action. - if let Err(err) = std_fs::remove_dir_all(&session_dir) { + if let Err(err) = safe_remove_dir_all(&session_dir) { tcx.sess.warn(&format!("Failed to delete partly initialized \ session dir `{}`: {}", session_dir.display(), @@ -282,7 +282,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) { debug!("finalize_session_directory() - invalidating session directory: {}", incr_comp_session_dir.display()); - if let Err(err) = std_fs::remove_dir_all(&*incr_comp_session_dir) { + if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) { sess.warn(&format!("Error deleting incremental compilation \ session directory `{}`: {}", incr_comp_session_dir.display(), @@ -460,7 +460,7 @@ fn lock_directory(sess: &Session, fn delete_session_dir_lock_file(sess: &Session, lock_file_path: &Path) { - if let Err(err) = std_fs::remove_file(&lock_file_path) { + if let Err(err) = safe_remove_file(&lock_file_path) { sess.warn(&format!("Error deleting lock file for incremental \ compilation session directory `{}`: {}", lock_file_path.display(), @@ -841,7 +841,7 @@ pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { debug!("garbage_collect_session_directories() - deleting `{}`", path.display()); - if let Err(err) = std_fs::remove_dir_all(&path) { + if let Err(err) = safe_remove_dir_all(&path) { sess.warn(&format!("Failed to garbage collect finalized incremental \ compilation session directory `{}`: {}", path.display(), @@ -860,7 +860,7 @@ pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { debug!("garbage_collect_session_directories() - deleting `{}`", path.display()); - if let Err(err) = std_fs::remove_dir_all(&path) { + if let Err(err) = safe_remove_dir_all(&path) { sess.warn(&format!("Failed to garbage collect incremental \ compilation session directory `{}`: {}", path.display(), @@ -893,6 +893,30 @@ fn all_except_most_recent(deletion_candidates: Vec<(SystemTime, PathBuf, Option< } } +/// Since paths of artifacts within session directories can get quite long, we +/// need to support deleting files with very long paths. The regular +/// WinApi functions only support paths up to 260 characters, however. In order +/// to circumvent this limitation, we canonicalize the path of the directory +/// before passing it to std::fs::remove_dir_all(). This will convert the path +/// into the '\\?\' format, which supports much longer paths. +fn safe_remove_dir_all(p: &Path) -> io::Result<()> { + if p.exists() { + let canonicalized = try!(p.canonicalize()); + std_fs::remove_dir_all(canonicalized) + } else { + Ok(()) + } +} + +fn safe_remove_file(p: &Path) -> io::Result<()> { + if p.exists() { + let canonicalized = try!(p.canonicalize()); + std_fs::remove_file(canonicalized) + } else { + Ok(()) + } +} + #[test] fn test_all_except_most_recent() { assert_eq!(all_except_most_recent(