Skip to content

Commit

Permalink
Merge pull request #11051 from neondatabase/vlad/release-8005-and-che…
Browse files Browse the repository at this point in the history
…rry-picks

Storage release 2025-02-28
  • Loading branch information
VladLazar authored Feb 28, 2025
2 parents 66d2592 + 0263c92 commit a1e67cf
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 22 deletions.
3 changes: 3 additions & 0 deletions control_plane/src/local_env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ pub struct NeonStorageControllerConf {

#[serde(with = "humantime_serde")]
pub long_reconcile_threshold: Option<Duration>,

pub load_safekeepers: bool,
}

impl NeonStorageControllerConf {
Expand All @@ -188,6 +190,7 @@ impl Default for NeonStorageControllerConf {
max_secondary_lag_bytes: None,
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
long_reconcile_threshold: None,
load_safekeepers: true,
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions control_plane/src/storage_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,10 @@ impl StorageController {
args.push("--start-as-candidate".to_string());
}

if self.config.load_safekeepers {
args.push("--load-safekeepers".to_string());
}

if let Some(private_key) = &self.private_key {
let claims = Claims::new(None, Scope::PageServerApi);
let jwt_token =
Expand Down
20 changes: 7 additions & 13 deletions pageserver/src/tenant/tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -473,21 +473,15 @@ async fn wait_for_active_tenant(
}

let mut update_rx = tenant.subscribe_for_state_updates();
loop {
tokio::select! {
_ = cancel.cancelled() => return ControlFlow::Break(()),
result = update_rx.changed() => if result.is_err() {
tokio::select! {
result = update_rx.wait_for(|s| s == &TenantState::Active) => {
if result.is_err() {
return ControlFlow::Break(());
}
}

match &*update_rx.borrow() {
TenantState::Active => {
debug!("Tenant state changed to active, continuing the task loop");
return ControlFlow::Continue(());
}
state => debug!("Not running the task loop, tenant is not active: {state:?}"),
}
debug!("Tenant state changed to active, continuing the task loop");
ControlFlow::Continue(())
},
_ = cancel.cancelled() => ControlFlow::Break(()),
}
}

Expand Down
5 changes: 5 additions & 0 deletions storage_controller/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ struct Cli {
// Flag to use https for requests to pageserver API.
#[arg(long, default_value = "false")]
use_https_pageserver_api: bool,

/// Whether to load safekeeprs from the database and heartbeat them
#[arg(long, default_value = "false")]
load_safekeepers: bool,
}

enum StrictMode {
Expand Down Expand Up @@ -350,6 +354,7 @@ async fn async_main() -> anyhow::Result<()> {
start_as_candidate: args.start_as_candidate,
http_service_port: args.listen.port() as i32,
use_https_pageserver_api: args.use_https_pageserver_api,
load_safekeepers: args.load_safekeepers,
};

// Validate that we can connect to the database
Expand Down
26 changes: 17 additions & 9 deletions storage_controller/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ pub struct Config {
pub long_reconcile_threshold: Duration,

pub use_https_pageserver_api: bool,

pub load_safekeepers: bool,
}

impl From<DatabaseError> for ApiError {
Expand Down Expand Up @@ -1405,15 +1407,20 @@ impl Service {
.set(nodes.len() as i64);

tracing::info!("Loading safekeepers from database...");
let safekeepers = persistence
.list_safekeepers()
.await?
.into_iter()
.map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new()))
.collect::<Vec<_>>();
let safekeepers = if config.load_safekeepers {
persistence
.list_safekeepers()
.await?
.into_iter()
.map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new()))
.collect::<Vec<_>>()
} else {
tracing::info!("Skipping safekeeper loading");
Default::default()
};

let safekeepers: HashMap<NodeId, Safekeeper> =
safekeepers.into_iter().map(|n| (n.get_id(), n)).collect();
tracing::info!("Loaded {} safekeepers from database.", safekeepers.len());

tracing::info!("Loading shards from database...");
let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?;
Expand Down Expand Up @@ -8022,7 +8029,8 @@ impl Service {
) -> Result<(), DatabaseError> {
let node_id = NodeId(record.id as u64);
self.persistence.safekeeper_upsert(record.clone()).await?;
{

if self.config.load_safekeepers {
let mut locked = self.inner.write().unwrap();
let mut safekeepers = (*locked.safekeepers).clone();
match safekeepers.entry(node_id) {
Expand Down Expand Up @@ -8054,7 +8062,7 @@ impl Service {
.await?;
let node_id = NodeId(id as u64);
// After the change has been persisted successfully, update the in-memory state
{
if self.config.load_safekeepers {
let mut locked = self.inner.write().unwrap();
let mut safekeepers = (*locked.safekeepers).clone();
let sk = safekeepers
Expand Down
7 changes: 7 additions & 0 deletions test_runner/fixtures/neon_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,13 @@ def __init__(self, config: NeonEnvBuilder):
if self.storage_controller_config is not None:
cfg["storage_controller"] = self.storage_controller_config

# Disable new storcon flag in compat tests
if config.test_may_use_compatibility_snapshot_binaries:
if "storage_controller" in cfg:
cfg["storage_controller"]["load_safekeepers"] = False
else:
cfg["storage_controller"] = {"load_safekeepers": False}

# Create config for pageserver
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
Expand Down

1 comment on commit a1e67cf

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

7040 tests run: 6707 passed, 0 failed, 333 skipped (full report)


Test coverage report is not available

The comment gets automatically updated with the latest test results
a1e67cf at 2025-02-28T20:25:49.301Z :recycle:

Please sign in to comment.