From 5f0a6e33b75d8428e807e7384a254fbb3250e59e Mon Sep 17 00:00:00 2001 From: Marcin S Date: Thu, 13 Oct 2022 10:18:38 -0400 Subject: [PATCH 1/6] Rename timeout consts and timeout parameter; bump leniency --- node/core/pvf/src/host.rs | 30 +++++++++--------- node/core/pvf/src/metrics.rs | 9 ++++-- node/core/pvf/src/prepare/pool.rs | 10 +++--- node/core/pvf/src/prepare/queue.rs | 48 ++++++++++++++--------------- node/core/pvf/src/prepare/worker.rs | 4 +-- 5 files changed, 53 insertions(+), 48 deletions(-) diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 6670ea48d4ec..811cee0e032e 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -38,15 +38,15 @@ use std::{ time::{Duration, SystemTime}, }; -/// The time period after which the precheck preparation worker is considered unresponsive and will -/// be killed. +/// For prechecking requests, the time period after which the preparation worker is considered +/// unresponsive and will be killed. // NOTE: If you change this make sure to fix the buckets of `pvf_preparation_time` metric. -pub const PRECHECK_COMPILATION_TIMEOUT: Duration = Duration::from_secs(60); +pub const PRECHECK_PREPARATION_TIMEOUT: Duration = Duration::from_secs(60); -/// The time period after which the execute preparation worker is considered unresponsive and will -/// be killed. +/// For execution and heads-up requests, the time period after which the preparation worker is +/// considered unresponsive and will be killed. // NOTE: If you change this make sure to fix the buckets of `pvf_preparation_time` metric. -pub const EXECUTE_COMPILATION_TIMEOUT: Duration = Duration::from_secs(180); +pub const LENIENT_PREPARATION_TIMEOUT: Duration = Duration::from_secs(360); /// An alias to not spell the type for the oneshot sender for the PVF execution result. pub(crate) type ResultSender = oneshot::Sender>; @@ -429,9 +429,10 @@ async fn handle_to_host( Ok(()) } -/// Handles PVF prechecking. +/// Handles PVF prechecking requests. /// -/// This tries to prepare the PVF by compiling the WASM blob within a given timeout ([`PRECHECK_COMPILATION_TIMEOUT`]). +/// This tries to prepare the PVF by compiling the WASM blob within a given timeout +/// ([`PRECHECK_PREPARATION_TIMEOUT`]). async fn handle_precheck_pvf( artifacts: &mut Artifacts, prepare_queue: &mut mpsc::Sender, @@ -459,7 +460,7 @@ async fn handle_precheck_pvf( prepare::ToQueue::Enqueue { priority: Priority::Normal, pvf, - compilation_timeout: PRECHECK_COMPILATION_TIMEOUT, + preparation_timeout: PRECHECK_PREPARATION_TIMEOUT, }, ) .await?; @@ -469,9 +470,10 @@ async fn handle_precheck_pvf( /// Handles PVF execution. /// -/// This will first try to prepare the PVF, if a prepared artifact does not already exist. If there is already a -/// preparation job, we coalesce the two preparation jobs. When preparing for execution, we use a more lenient timeout -/// ([`EXECUTE_COMPILATION_TIMEOUT`]) than when prechecking. +/// This will first try to prepare the PVF, if a prepared artifact does not already exist. If there +/// is already a preparation job, we coalesce the two preparation jobs. When preparing for +/// execution, we use a more lenient timeout ([`LENIENT_PREPARATION_TIMEOUT`]) than when +/// prechecking. async fn handle_execute_pvf( cache_path: &Path, artifacts: &mut Artifacts, @@ -518,7 +520,7 @@ async fn handle_execute_pvf( prepare::ToQueue::Enqueue { priority, pvf, - compilation_timeout: EXECUTE_COMPILATION_TIMEOUT, + preparation_timeout: LENIENT_PREPARATION_TIMEOUT, }, ) .await?; @@ -557,7 +559,7 @@ async fn handle_heads_up( prepare::ToQueue::Enqueue { priority: Priority::Normal, pvf: active_pvf, - compilation_timeout: EXECUTE_COMPILATION_TIMEOUT, + preparation_timeout: LENIENT_PREPARATION_TIMEOUT, }, ) .await?; diff --git a/node/core/pvf/src/metrics.rs b/node/core/pvf/src/metrics.rs index 547ee65f3e9d..20965ec7dbd7 100644 --- a/node/core/pvf/src/metrics.rs +++ b/node/core/pvf/src/metrics.rs @@ -155,8 +155,8 @@ impl metrics::Metrics for Metrics { "Time spent in preparing PVF artifacts in seconds", ) .buckets(vec![ - // This is synchronized with the PRECHECK_COMPILATION_TIMEOUT=60s - // and EXECUTE_COMPILATION_TIMEOUT=180s constants found in + // This is synchronized with the PRECHECK_PREPARATION_TIMEOUT=60s + // and LENIENT_PREPARATION_TIMEOUT=360s constants found in // src/prepare/worker.rs 0.1, 0.5, @@ -167,7 +167,10 @@ impl metrics::Metrics for Metrics { 20.0, 30.0, 60.0, - 180.0, + 120.0, + 240.0, + 360.0, + 480.0, ]), )?, registry, diff --git a/node/core/pvf/src/prepare/pool.rs b/node/core/pvf/src/prepare/pool.rs index fad6ed167614..9ba64be97555 100644 --- a/node/core/pvf/src/prepare/pool.rs +++ b/node/core/pvf/src/prepare/pool.rs @@ -65,7 +65,7 @@ pub enum ToPool { worker: Worker, code: Arc>, artifact_path: PathBuf, - compilation_timeout: Duration, + preparation_timeout: Duration, }, } @@ -210,7 +210,7 @@ fn handle_to_pool( metrics.prepare_worker().on_begin_spawn(); mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed()); }, - ToPool::StartWork { worker, code, artifact_path, compilation_timeout } => { + ToPool::StartWork { worker, code, artifact_path, preparation_timeout } => { if let Some(data) = spawned.get_mut(worker) { if let Some(idle) = data.idle.take() { let preparation_timer = metrics.time_preparation(); @@ -221,7 +221,7 @@ fn handle_to_pool( code, cache_path.to_owned(), artifact_path, - compilation_timeout, + preparation_timeout, preparation_timer, ) .boxed(), @@ -269,11 +269,11 @@ async fn start_work_task( code: Arc>, cache_path: PathBuf, artifact_path: PathBuf, - compilation_timeout: Duration, + preparation_timeout: Duration, _preparation_timer: Option, ) -> PoolEvent { let outcome = - worker::start_work(idle, code, &cache_path, artifact_path, compilation_timeout).await; + worker::start_work(idle, code, &cache_path, artifact_path, preparation_timeout).await; PoolEvent::StartWork(worker, outcome) } diff --git a/node/core/pvf/src/prepare/queue.rs b/node/core/pvf/src/prepare/queue.rs index a77b88e00345..ae0757d80461 100644 --- a/node/core/pvf/src/prepare/queue.rs +++ b/node/core/pvf/src/prepare/queue.rs @@ -33,7 +33,7 @@ pub enum ToQueue { /// /// Note that it is incorrect to enqueue the same PVF again without first receiving the /// [`FromQueue`] response. - Enqueue { priority: Priority, pvf: Pvf, compilation_timeout: Duration }, + Enqueue { priority: Priority, pvf: Pvf, preparation_timeout: Duration }, } /// A response from queue. @@ -80,7 +80,7 @@ struct JobData { priority: Priority, pvf: Pvf, /// The timeout for the preparation job. - compilation_timeout: Duration, + preparation_timeout: Duration, worker: Option, } @@ -208,8 +208,8 @@ impl Queue { async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fatal> { match to_queue { - ToQueue::Enqueue { priority, pvf, compilation_timeout } => { - handle_enqueue(queue, priority, pvf, compilation_timeout).await?; + ToQueue::Enqueue { priority, pvf, preparation_timeout } => { + handle_enqueue(queue, priority, pvf, preparation_timeout).await?; }, } Ok(()) @@ -219,13 +219,13 @@ async fn handle_enqueue( queue: &mut Queue, priority: Priority, pvf: Pvf, - compilation_timeout: Duration, + preparation_timeout: Duration, ) -> Result<(), Fatal> { gum::debug!( target: LOG_TARGET, validation_code_hash = ?pvf.code_hash, ?priority, - ?compilation_timeout, + ?preparation_timeout, "PVF is enqueued for preparation.", ); queue.metrics.prepare_enqueued(); @@ -247,7 +247,7 @@ async fn handle_enqueue( return Ok(()) } - let job = queue.jobs.insert(JobData { priority, pvf, compilation_timeout, worker: None }); + let job = queue.jobs.insert(JobData { priority, pvf, preparation_timeout, worker: None }); queue.artifact_id_to_job.insert(artifact_id, job); if let Some(available) = find_idle_worker(queue) { @@ -439,7 +439,7 @@ async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal worker, code: job_data.pvf.code.clone(), artifact_path, - compilation_timeout: job_data.compilation_timeout, + preparation_timeout: job_data.preparation_timeout, }, ) .await?; @@ -494,7 +494,7 @@ pub fn start( #[cfg(test)] mod tests { use super::*; - use crate::{error::PrepareError, host::PRECHECK_COMPILATION_TIMEOUT}; + use crate::{error::PrepareError, host::PRECHECK_PREPARATION_TIMEOUT}; use assert_matches::assert_matches; use futures::{future::BoxFuture, FutureExt}; use slotmap::SlotMap; @@ -612,7 +612,7 @@ mod tests { test.send_queue(ToQueue::Enqueue { priority: Priority::Normal, pvf: pvf(1), - compilation_timeout: PRECHECK_COMPILATION_TIMEOUT, + preparation_timeout: PRECHECK_PREPARATION_TIMEOUT, }); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); @@ -626,12 +626,12 @@ mod tests { #[async_std::test] async fn dont_spawn_over_soft_limit_unless_critical() { let mut test = Test::new(2, 3); - let compilation_timeout = PRECHECK_COMPILATION_TIMEOUT; + let preparation_timeout = PRECHECK_PREPARATION_TIMEOUT; let priority = Priority::Normal; - test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(1), compilation_timeout }); - test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(2), compilation_timeout }); - test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(3), compilation_timeout }); + test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(1), preparation_timeout }); + test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(2), preparation_timeout }); + test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(3), preparation_timeout }); // Receive only two spawns. assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); @@ -655,7 +655,7 @@ mod tests { test.send_queue(ToQueue::Enqueue { priority: Priority::Critical, pvf: pvf(4), - compilation_timeout, + preparation_timeout, }); // 2 out of 2 are working, but there is a critical job incoming. That means that spawning @@ -666,12 +666,12 @@ mod tests { #[async_std::test] async fn cull_unwanted() { let mut test = Test::new(1, 2); - let compilation_timeout = PRECHECK_COMPILATION_TIMEOUT; + let preparation_timeout = PRECHECK_PREPARATION_TIMEOUT; test.send_queue(ToQueue::Enqueue { priority: Priority::Normal, pvf: pvf(1), - compilation_timeout, + preparation_timeout, }); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); let w1 = test.workers.insert(()); @@ -682,7 +682,7 @@ mod tests { test.send_queue(ToQueue::Enqueue { priority: Priority::Critical, pvf: pvf(2), - compilation_timeout, + preparation_timeout, }); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); @@ -701,10 +701,10 @@ mod tests { async fn worker_mass_die_out_doesnt_stall_queue() { let mut test = Test::new(2, 2); - let (priority, compilation_timeout) = (Priority::Normal, PRECHECK_COMPILATION_TIMEOUT); - test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(1), compilation_timeout }); - test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(2), compilation_timeout }); - test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(3), compilation_timeout }); + let (priority, preparation_timeout) = (Priority::Normal, PRECHECK_PREPARATION_TIMEOUT); + test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(1), preparation_timeout }); + test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(2), preparation_timeout }); + test.send_queue(ToQueue::Enqueue { priority, pvf: pvf(3), preparation_timeout }); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); @@ -734,7 +734,7 @@ mod tests { test.send_queue(ToQueue::Enqueue { priority: Priority::Normal, pvf: pvf(1), - compilation_timeout: PRECHECK_COMPILATION_TIMEOUT, + preparation_timeout: PRECHECK_PREPARATION_TIMEOUT, }); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); @@ -759,7 +759,7 @@ mod tests { test.send_queue(ToQueue::Enqueue { priority: Priority::Normal, pvf: pvf(1), - compilation_timeout: PRECHECK_COMPILATION_TIMEOUT, + preparation_timeout: PRECHECK_PREPARATION_TIMEOUT, }); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn); diff --git a/node/core/pvf/src/prepare/worker.rs b/node/core/pvf/src/prepare/worker.rs index 77570b47360b..1cf512894740 100644 --- a/node/core/pvf/src/prepare/worker.rs +++ b/node/core/pvf/src/prepare/worker.rs @@ -65,7 +65,7 @@ pub async fn start_work( code: Arc>, cache_path: &Path, artifact_path: PathBuf, - compilation_timeout: Duration, + preparation_timeout: Duration, ) -> Outcome { let IdleWorker { mut stream, pid } = worker; @@ -100,7 +100,7 @@ pub async fn start_work( } let selected = - match async_std::future::timeout(compilation_timeout, framed_recv(&mut stream)).await { + match async_std::future::timeout(preparation_timeout, framed_recv(&mut stream)).await { Ok(Ok(response_bytes)) => { // Received bytes from worker within the time limit. // By convention we expect encoded `PrepareResult`. From 38231a271fbb4bd48c88277b31dd49a95651b1a3 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Thu, 13 Oct 2022 13:48:20 -0400 Subject: [PATCH 2/6] Update implementor's guide with info about PVFs --- roadmap/implementers-guide/src/glossary.md | 28 +++++++++++ .../implementers-guide/src/pvf-prechecking.md | 47 +++++++++---------- 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/roadmap/implementers-guide/src/glossary.md b/roadmap/implementers-guide/src/glossary.md index a64c5bd00a50..c34feab63883 100644 --- a/roadmap/implementers-guide/src/glossary.md +++ b/roadmap/implementers-guide/src/glossary.md @@ -27,6 +27,8 @@ exactly one downward message queue. - PDK (Parachain Development Kit): A toolset that allows one to develop a parachain. Cumulus is a PDK. - Preimage: In our context, if `H(X) = Y` where `H` is a hash function and `Y` is the hash, then `X` is the hash preimage. - Proof-of-Validity (PoV): A stateless-client proof that a parachain candidate is valid, with respect to some validation function. +- PVF: Parachain Validation Function. The validation code that is run by + validators on parachains or parathreads. See the [PVF glossary][1]. - Relay Parent: A block in the relay chain, referred to in a context where work is being done in the context of the state at this block. - Router: The router module is a meta module that consists of three runtime modules responsible for routing messages between paras and the relay chain. The three separate runtime modules are: Dmp, Ump, Hrmp, each responsible for the respective part of message routing. - Runtime: The relay-chain state machine. @@ -40,6 +42,32 @@ exactly one downward message queue. - VMP: (Vertical Message Passing) A family of mechanisms that are responsible for message exchange between the relay chain and parachains. - XCMP (Cross-Chain Message Passing) A type of horizontal message passing (i.e. between parachains) that allows secure message passing directly between parachains and has minimal resource requirements from the relay chain, thus highly scalable. +## PVF + +The PVF functionality involves several processes which may be potentially +confusing: + +- **Prechecking:** This is the process of initially checking the PVF when it is + first added. We attempt *preparation* of the PVF and make sure it succeeds + within a given timeout. +- **Execution:** This actually executes the PVF. The node may not have the + artifact from prechecking, in which case this process also includes a + *preparation* job. The timeout for preparation here is more lenient than when + prechecking. +- **Preparation:** This is the process of preparing the WASM blob and includes + both *prevalidation* and *compilation*. As prevalidation is pretty minimal + right now, preparation mostly consists of compilation. Note that *prechecking* + just consists of preparation, whereas *execution* will also prepare the PVF if + the artifact is not already found. +- **Prevalidation:** Right now this just tries to deserialize the binary with + parity-wasm. It is a part of *preparation*. +- **Compilation:** This is the process of compiling a PVF from wasm code to + a machine code *artifact*. It is a part of *preparation*. +- **Artifact:** The compiled machine code for the wasm module. + +## See Also + Also of use is the [Substrate Glossary](https://substrate.dev/docs/en/knowledgebase/getting-started/glossary). [0]: https://wiki.polkadot.network/docs/learn-consensus +[1]: #pvf diff --git a/roadmap/implementers-guide/src/pvf-prechecking.md b/roadmap/implementers-guide/src/pvf-prechecking.md index 1dc7611c0cef..ba46ed3a9231 100644 --- a/roadmap/implementers-guide/src/pvf-prechecking.md +++ b/roadmap/implementers-guide/src/pvf-prechecking.md @@ -1,32 +1,14 @@ # PVF Pre-checking Overview -> ⚠️ This discusses a mechanism that is currently under-development. Follow the progress under [#3211]. - -## Terms - -This functionality involves several processes which may be potentially -confusing: - -- **Prechecking:** This is the process of initially checking the PVF when it is - first added. We attempt *preparation* of the PVF and make sure it succeeds - within a given timeout. -- **Execution:** This actually executes the PVF. The node may not have the - artifact from prechecking, in which case this process also includes a - *preparation* job. The timeout for preparation here is more lenient than when - prechecking. -- **Preparation:** This is the process of preparing the WASM blob and includes - both *prevalidation* and *compilation*. As prevalidation is pretty minimal - right now, preparation mostly consists of compilation. Note that *prechecking* - just consists of preparation, whereas *execution* will also prepare the PVF if - the artifact is not already found. -- **Prevalidation:** Right now this just tries to deserialize the binary with - parity-wasm. It is a part of *preparation*. -- **Compilation:** This is the process of compiling a PVF from wasm code to - machine code. It is a part of *preparation*. +> ⚠️ This discusses a mechanism that is currently under-development. Follow the progress under [#3211][3211]. + +For an explanation of the terms used in this document, please see the [glossary]. ## Motivation -Parachains' and parathreads' validation function is described by a wasm module that we refer to as a PVF. Since it's a wasm module the typical way of executing it is to compile it to machine code. Typically an optimizing compiler consists of algorithms that are able to optimize the resulting machine code heavily. However, while those algorithms perform quite well for a typical wasm code produced by standard toolchains (e.g. rustc/LLVM), those algorithms can be abused to consume a lot of resources. Moreover, since those algorithms are rather complex there is a lot of room for a bug that can crash the compiler. +Parachains' and parathreads' validation function is described by a wasm module that we refer to as a PVF. Since a PVF is a wasm module the typical way of executing it is to compile it to machine code. + +Typically an optimizing compiler consists of algorithms that are able to optimize the resulting machine code heavily. However, while those algorithms perform quite well for a typical wasm code produced by standard toolchains (e.g. rustc/LLVM), those algorithms can be abused to consume a lot of resources. Moreover, since those algorithms are rather complex there is a lot of room for a bug that can crash the compiler. If compilation of a Parachain Validation Function (PVF) takes too long or uses too much memory, this can leave a node in limbo as to whether a candidate of that parachain is valid or not. @@ -66,7 +48,22 @@ The logic described above is implemented by the [paras] module. On the node-side, there is a PVF pre-checking [subsystem][pvf-prechecker-subsystem] that scans the chain for new PVFs via using [runtime APIs][pvf-runtime-api]. Upon finding a new PVF, the subsystem will initiate a PVF pre-checking request and wait for the result. Whenever the result is obtained, the subsystem will use the [runtime API][pvf-runtime-api] to submit a vote for the PVF. The vote is an unsigned transaction. The vote will be distributed via the gossip similarly to a normal transaction. Eventually a block producer will include the vote into the block where it will be handled by the [runtime][paras]. -[#3211]: /~https://github.com/paritytech/polkadot/issues/3211 +## Summary + +Parachains' and parathreads' validation function is described by a wasm module that we refer to as a PVF. + +In order to make the PVF usable for candidate validation it has to be registered on-chain. + +As part of the registration process, it has to go through pre-checking. + +Pre-checking is a game of attempting preparation and reporting the results back on-chain. + +We define preparation as a process that: validates the consistency of the wasm binary (aka prevalidation) and the compilation of the wasm module into machine code (refered to as artifact). + +Besides pre-checking, preparation can also be triggered by execution, since a compiled artifact is needed for the execution. + +[glossary]: glossary.md#pvf +[3211]: /~https://github.com/paritytech/polkadot/issues/3211 [paras]: runtime/paras.md [pvf-runtime-api]: runtime-api/pvf-prechecking.md [pvf-prechecker-subsystem]: node/utility/pvf-prechecker.md From 2db8522a4f0ea5c43415fb7980ee6e89f5f6ae54 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Thu, 13 Oct 2022 13:52:03 -0400 Subject: [PATCH 3/6] Make glossary a bit easier to read --- roadmap/implementers-guide/src/glossary.md | 75 +++++++++++----------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/roadmap/implementers-guide/src/glossary.md b/roadmap/implementers-guide/src/glossary.md index c34feab63883..7d8eba3b6b0f 100644 --- a/roadmap/implementers-guide/src/glossary.md +++ b/roadmap/implementers-guide/src/glossary.md @@ -2,45 +2,44 @@ Here you can find definitions of a bunch of jargon, usually specific to the Polkadot project. -- BABE: (Blind Assignment for Blockchain Extension). The algorithm validators use to safely extend the Relay Chain. See [the Polkadot wiki][0] for more information. -- Backable Candidate: A Parachain Candidate which is backed by a majority of validators assigned to a given parachain. -- Backed Candidate: A Backable Candidate noted in a relay-chain block -- Backing: A set of statements proving that a Parachain Candidate is backable. -- Collator: A node who generates Proofs-of-Validity (PoV) for blocks of a specific parachain. -- DMP: (Downward Message Passing). Message passing from the relay-chain to a parachain. Also there is a runtime parachains module with the same name. -- DMQ: (Downward Message Queue). A message queue for messages from the relay-chain down to a parachain. A parachain has +- **BABE:** (Blind Assignment for Blockchain Extension). The algorithm validators use to safely extend the Relay Chain. See [the Polkadot wiki][0] for more information. +- **Backable Candidate:** A Parachain Candidate which is backed by a majority of validators assigned to a given parachain. +- **Backed Candidate:** A Backable Candidate noted in a relay-chain block +- **Backing:** A set of statements proving that a Parachain Candidate is backable. +- **Collator:** A node who generates Proofs-of-Validity (PoV) for blocks of a specific parachain. +- **DMP:** (Downward Message Passing). Message passing from the relay-chain to a parachain. Also there is a runtime parachains module with the same name. +- **DMQ:** (Downward Message Queue). A message queue for messages from the relay-chain down to a parachain. A parachain has exactly one downward message queue. -- Extrinsic: An element of a relay-chain block which triggers a specific entry-point of a runtime module with given arguments. -- GRANDPA: (Ghost-based Recursive ANcestor Deriving Prefix Agreement). The algorithm validators use to guarantee finality of the Relay Chain. -- HRMP: (Horizontally Relay-routed Message Passing). A mechanism for message passing between parachains (hence horizontal) that leverages the relay-chain storage. Predates XCMP. Also there is a runtime parachains module with the same name. -- Inclusion Pipeline: The set of steps taken to carry a Parachain Candidate from authoring, to backing, to availability and full inclusion in an active fork of its parachain. -- Module: A component of the Runtime logic, encapsulating storage, routines, and entry-points. -- Module Entry Point: A recipient of new information presented to the Runtime. This may trigger routines. -- Module Routine: A piece of code executed within a module by block initialization, closing, or upon an entry point being triggered. This may execute computation, and read or write storage. -- MQC: (Message Queue Chain). A cryptographic data structure that resembles an append-only linked list which doesn't store original values but only their hashes. The whole structure is described by a single hash, referred as a "head". When a value is appended, it's contents hashed with the previous head creating a hash that becomes a new head. -- Node: A participant in the Polkadot network, who follows the protocols of communication and connection to other nodes. Nodes form a peer-to-peer network topology without a central authority. -- Parachain Candidate, or Candidate: A proposed block for inclusion into a parachain. -- Parablock: A block in a parachain. -- Parachain: A constituent chain secured by the Relay Chain's validators. -- Parachain Validators: A subset of validators assigned during a period of time to back candidates for a specific parachain -- Parathread: A parachain which is scheduled on a pay-as-you-go basis. -- PDK (Parachain Development Kit): A toolset that allows one to develop a parachain. Cumulus is a PDK. -- Preimage: In our context, if `H(X) = Y` where `H` is a hash function and `Y` is the hash, then `X` is the hash preimage. -- Proof-of-Validity (PoV): A stateless-client proof that a parachain candidate is valid, with respect to some validation function. -- PVF: Parachain Validation Function. The validation code that is run by - validators on parachains or parathreads. See the [PVF glossary][1]. -- Relay Parent: A block in the relay chain, referred to in a context where work is being done in the context of the state at this block. -- Router: The router module is a meta module that consists of three runtime modules responsible for routing messages between paras and the relay chain. The three separate runtime modules are: Dmp, Ump, Hrmp, each responsible for the respective part of message routing. -- Runtime: The relay-chain state machine. -- Runtime Module: See Module. -- Runtime API: A means for the node-side behavior to access structured information based on the state of a fork of the blockchain. -- Secondary Checker: A validator who has been randomly selected to perform secondary approval checks on a parablock which is pending approval. -- Subsystem: A long-running task which is responsible for carrying out a particular category of work. -- UMP: (Upward Message Passing) A vertical message passing mechanism from a parachain to the relay chain. -- Validator: Specially-selected node in the network who is responsible for validating parachain blocks and issuing attestations about their validity. -- Validation Function: A piece of Wasm code that describes the state-transition function of a parachain. -- VMP: (Vertical Message Passing) A family of mechanisms that are responsible for message exchange between the relay chain and parachains. -- XCMP (Cross-Chain Message Passing) A type of horizontal message passing (i.e. between parachains) that allows secure message passing directly between parachains and has minimal resource requirements from the relay chain, thus highly scalable. +- **Extrinsic:** An element of a relay-chain block which triggers a specific entry-point of a runtime module with given arguments. +- **GRANDPA:** (Ghost-based Recursive ANcestor Deriving Prefix Agreement). The algorithm validators use to guarantee finality of the Relay Chain. +- **HRMP:** (Horizontally Relay-routed Message Passing). A mechanism for message passing between parachains (hence horizontal) that leverages the relay-chain storage. Predates XCMP. Also there is a runtime parachains module with the same name. +- **Inclusion Pipeline:** The set of steps taken to carry a Parachain Candidate from authoring, to backing, to availability and full inclusion in an active fork of its parachain. +- **Module:** A component of the Runtime logic, encapsulating storage, routines, and entry-points. +- **Module Entry Point:** A recipient of new information presented to the Runtime. This may trigger routines. +- **Module Routine:** A piece of code executed within a module by block initialization, closing, or upon an entry point being triggered. This may execute computation, and read or write storage. +- **MQC:** (Message Queue Chain). A cryptographic data structure that resembles an append-only linked list which doesn't store original values but only their hashes. The whole structure is described by a single hash, referred as a "head". When a value is appended, it's contents hashed with the previous head creating a hash that becomes a new head. +- **Node:** A participant in the Polkadot network, who follows the protocols of communication and connection to other nodes. Nodes form a peer-to-peer network topology without a central authority. +- **Parachain Candidate, or Candidate:** A proposed block for inclusion into a parachain. +- **Parablock:** A block in a parachain. +- **Parachain:** A constituent chain secured by the Relay Chain's validators. +- **Parachain Validators:** A subset of validators assigned during a period of time to back candidates for a specific parachain +- **Parathread:** A parachain which is scheduled on a pay-as-you-go basis. +- **PDK (Parachain Development Kit):** A toolset that allows one to develop a parachain. Cumulus is a PDK. +- **Preimage:** In our context, if `H(X) = Y` where `H` is a hash function and `Y` is the hash, then `X` is the hash preimage. +- **Proof-of-Validity (PoV):** A stateless-client proof that a parachain candidate is valid, with respect to some validation function. +- **PVF:** Parachain Validation Function. The validation code that is run by validators on parachains or parathreads. See the [PVF glossary][1]. +- **Relay Parent:** A block in the relay chain, referred to in a context where work is being done in the context of the state at this block. +- **Router:** The router module is a meta module that consists of three runtime modules responsible for routing messages between paras and the relay chain. The three separate runtime modules are: Dmp, Ump, Hrmp, each responsible for the respective part of message routing. +- **Runtime:** The relay-chain state machine. +- **Runtime Module:** See Module. +- **Runtime API:** A means for the node-side behavior to access structured information based on the state of a fork of the blockchain. +- **Secondary Checker:** A validator who has been randomly selected to perform secondary approval checks on a parablock which is pending approval. +- **Subsystem:** A long-running task which is responsible for carrying out a particular category of work. +- **UMP:** (Upward Message Passing) A vertical message passing mechanism from a parachain to the relay chain. +- **Validator:** Specially-selected node in the network who is responsible for validating parachain blocks and issuing attestations about their validity. +- **Validation Function:** A piece of Wasm code that describes the state-transition function of a parachain. +- **VMP:** (Vertical Message Passing) A family of mechanisms that are responsible for message exchange between the relay chain and parachains. +- **XCMP:** (Cross-Chain Message Passing) A type of horizontal message passing (i.e. between parachains) that allows secure message passing directly between parachains and has minimal resource requirements from the relay chain, thus highly scalable. ## PVF From 5e56bdf02d5ed1918982ab4b395fdb4cef529067 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Thu, 13 Oct 2022 14:08:28 -0400 Subject: [PATCH 4/6] Add a note to LENIENT_PREPARATION_TIMEOUT --- node/core/pvf/src/host.rs | 3 ++- roadmap/implementers-guide/src/pvf-prechecking.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 811cee0e032e..69f2e07b56cc 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -44,7 +44,8 @@ use std::{ pub const PRECHECK_PREPARATION_TIMEOUT: Duration = Duration::from_secs(60); /// For execution and heads-up requests, the time period after which the preparation worker is -/// considered unresponsive and will be killed. +/// considered unresponsive and will be killed. More lenient than the timeout for prechecking to +/// prevent honest validators from timing out on valid PVFs. // NOTE: If you change this make sure to fix the buckets of `pvf_preparation_time` metric. pub const LENIENT_PREPARATION_TIMEOUT: Duration = Duration::from_secs(360); diff --git a/roadmap/implementers-guide/src/pvf-prechecking.md b/roadmap/implementers-guide/src/pvf-prechecking.md index ba46ed3a9231..b08e239e80b8 100644 --- a/roadmap/implementers-guide/src/pvf-prechecking.md +++ b/roadmap/implementers-guide/src/pvf-prechecking.md @@ -48,7 +48,7 @@ The logic described above is implemented by the [paras] module. On the node-side, there is a PVF pre-checking [subsystem][pvf-prechecker-subsystem] that scans the chain for new PVFs via using [runtime APIs][pvf-runtime-api]. Upon finding a new PVF, the subsystem will initiate a PVF pre-checking request and wait for the result. Whenever the result is obtained, the subsystem will use the [runtime API][pvf-runtime-api] to submit a vote for the PVF. The vote is an unsigned transaction. The vote will be distributed via the gossip similarly to a normal transaction. Eventually a block producer will include the vote into the block where it will be handled by the [runtime][paras]. -## Summary +## Pre-checking Summary Parachains' and parathreads' validation function is described by a wasm module that we refer to as a PVF. From 3bcf8d247e00cd93691296269971f3c7c59fec1e Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 14 Oct 2022 12:09:18 -0400 Subject: [PATCH 5/6] Remove PVF-specific section from glossary --- roadmap/implementers-guide/src/glossary.md | 27 +++---------------- .../implementers-guide/src/pvf-prechecking.md | 9 ++----- 2 files changed, 5 insertions(+), 31 deletions(-) diff --git a/roadmap/implementers-guide/src/glossary.md b/roadmap/implementers-guide/src/glossary.md index 7d8eba3b6b0f..8612d8834cb8 100644 --- a/roadmap/implementers-guide/src/glossary.md +++ b/roadmap/implementers-guide/src/glossary.md @@ -27,7 +27,9 @@ exactly one downward message queue. - **PDK (Parachain Development Kit):** A toolset that allows one to develop a parachain. Cumulus is a PDK. - **Preimage:** In our context, if `H(X) = Y` where `H` is a hash function and `Y` is the hash, then `X` is the hash preimage. - **Proof-of-Validity (PoV):** A stateless-client proof that a parachain candidate is valid, with respect to some validation function. -- **PVF:** Parachain Validation Function. The validation code that is run by validators on parachains or parathreads. See the [PVF glossary][1]. +- **PVF:** Parachain Validation Function. The validation code that is run by validators on parachains or parathreads. +- **PVF Prechecking:** This is the process of initially checking the PVF when it is first added. We attempt preparation of the PVF and make sure it succeeds within a given timeout. +- **PVF Preparation:** This is the process of preparing the WASM blob and includes both prevalidation and compilation. As prevalidation is pretty minimal right now, preparation mostly consists of compilation. - **Relay Parent:** A block in the relay chain, referred to in a context where work is being done in the context of the state at this block. - **Router:** The router module is a meta module that consists of three runtime modules responsible for routing messages between paras and the relay chain. The three separate runtime modules are: Dmp, Ump, Hrmp, each responsible for the respective part of message routing. - **Runtime:** The relay-chain state machine. @@ -41,29 +43,6 @@ exactly one downward message queue. - **VMP:** (Vertical Message Passing) A family of mechanisms that are responsible for message exchange between the relay chain and parachains. - **XCMP:** (Cross-Chain Message Passing) A type of horizontal message passing (i.e. between parachains) that allows secure message passing directly between parachains and has minimal resource requirements from the relay chain, thus highly scalable. -## PVF - -The PVF functionality involves several processes which may be potentially -confusing: - -- **Prechecking:** This is the process of initially checking the PVF when it is - first added. We attempt *preparation* of the PVF and make sure it succeeds - within a given timeout. -- **Execution:** This actually executes the PVF. The node may not have the - artifact from prechecking, in which case this process also includes a - *preparation* job. The timeout for preparation here is more lenient than when - prechecking. -- **Preparation:** This is the process of preparing the WASM blob and includes - both *prevalidation* and *compilation*. As prevalidation is pretty minimal - right now, preparation mostly consists of compilation. Note that *prechecking* - just consists of preparation, whereas *execution* will also prepare the PVF if - the artifact is not already found. -- **Prevalidation:** Right now this just tries to deserialize the binary with - parity-wasm. It is a part of *preparation*. -- **Compilation:** This is the process of compiling a PVF from wasm code to - a machine code *artifact*. It is a part of *preparation*. -- **Artifact:** The compiled machine code for the wasm module. - ## See Also Also of use is the [Substrate Glossary](https://substrate.dev/docs/en/knowledgebase/getting-started/glossary). diff --git a/roadmap/implementers-guide/src/pvf-prechecking.md b/roadmap/implementers-guide/src/pvf-prechecking.md index b08e239e80b8..4dce61d2a83b 100644 --- a/roadmap/implementers-guide/src/pvf-prechecking.md +++ b/roadmap/implementers-guide/src/pvf-prechecking.md @@ -2,8 +2,6 @@ > ⚠️ This discusses a mechanism that is currently under-development. Follow the progress under [#3211][3211]. -For an explanation of the terms used in this document, please see the [glossary]. - ## Motivation Parachains' and parathreads' validation function is described by a wasm module that we refer to as a PVF. Since a PVF is a wasm module the typical way of executing it is to compile it to machine code. @@ -54,15 +52,12 @@ Parachains' and parathreads' validation function is described by a wasm module t In order to make the PVF usable for candidate validation it has to be registered on-chain. -As part of the registration process, it has to go through pre-checking. - -Pre-checking is a game of attempting preparation and reporting the results back on-chain. +As part of the registration process, it has to go through pre-checking. Pre-checking is a game of attempting preparation and reporting the results back on-chain. We define preparation as a process that: validates the consistency of the wasm binary (aka prevalidation) and the compilation of the wasm module into machine code (refered to as artifact). -Besides pre-checking, preparation can also be triggered by execution, since a compiled artifact is needed for the execution. +Besides pre-checking, preparation can also be triggered by execution, since a compiled artifact is needed for the execution. If an artifact already exists, execution will skip preparation. If it does do preparation, execution uses a more lenient timeout than preparation, to avoid the situation where honest validators fail on valid, pre-checked PVFs. -[glossary]: glossary.md#pvf [3211]: /~https://github.com/paritytech/polkadot/issues/3211 [paras]: runtime/paras.md [pvf-runtime-api]: runtime-api/pvf-prechecking.md From 7c8e46650aaf8c4d435ab7365f96e903b789ff18 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Mon, 31 Oct 2022 22:13:54 +0100 Subject: [PATCH 6/6] Fix some typos --- node/core/pvf/src/artifacts.rs | 4 ++-- node/core/pvf/src/pvf.rs | 2 +- node/primitives/src/lib.rs | 6 ++---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/node/core/pvf/src/artifacts.rs b/node/core/pvf/src/artifacts.rs index 49d1be75fed4..32f487cfc062 100644 --- a/node/core/pvf/src/artifacts.rs +++ b/node/core/pvf/src/artifacts.rs @@ -96,7 +96,7 @@ pub enum ArtifactState { /// That means that the artifact should be accessible through the path obtained by the artifact /// id (unless, it was removed externally). Prepared { - /// The time when the artifact was the last time needed. + /// The time when the artifact was last needed. /// /// This is updated when we get the heads up for this artifact or when we just discover /// this file. @@ -120,7 +120,7 @@ impl Artifacts { /// /// The recognized artifacts will be filled in the table and unrecognized will be removed. pub async fn new(cache_path: &Path) -> Self { - // Make sure that the cache path directory and all it's parents are created. + // Make sure that the cache path directory and all its parents are created. // First delete the entire cache. Nodes are long-running so this should populate shortly. let _ = async_std::fs::remove_dir_all(cache_path).await; let _ = async_std::fs::create_dir_all(cache_path).await; diff --git a/node/core/pvf/src/pvf.rs b/node/core/pvf/src/pvf.rs index 901cc1c70d6e..d06968a13d43 100644 --- a/node/core/pvf/src/pvf.rs +++ b/node/core/pvf/src/pvf.rs @@ -19,7 +19,7 @@ use polkadot_parachain::primitives::ValidationCodeHash; use sp_core::blake2_256; use std::{fmt, sync::Arc}; -/// A struct that carries code of a parachain validation function and it's hash. +/// A struct that carries code of a parachain validation function and its hash. /// /// Should be cheap to clone. #[derive(Clone)] diff --git a/node/primitives/src/lib.rs b/node/primitives/src/lib.rs index 4551ce9855e3..e75181b900e9 100644 --- a/node/primitives/src/lib.rs +++ b/node/primitives/src/lib.rs @@ -71,7 +71,7 @@ pub const BACKING_EXECUTION_TIMEOUT: Duration = Duration::from_secs(2); /// /// This is deliberately much longer than the backing execution timeout to /// ensure that in the absence of extremely large disparities between hardware, -/// blocks that pass backing are considerd executable by approval checkers or +/// blocks that pass backing are considered executable by approval checkers or /// dispute participants. pub const APPROVAL_EXECUTION_TIMEOUT: Duration = Duration::from_secs(12); @@ -90,9 +90,7 @@ pub const MAX_FINALITY_LAG: u32 = 500; pub struct SessionWindowSize(SessionIndex); #[macro_export] -/// Create a new checked `SessionWindowSize` -/// -/// which cannot be 0. +/// Create a new checked `SessionWindowSize` which cannot be 0. macro_rules! new_session_window_size { (0) => { compile_error!("Must be non zero");