From 5f24fae2a489ac215255a99ec8221b67dad3418f Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Thu, 28 Sep 2023 08:53:26 +0200
Subject: [PATCH 01/14] vtc: Polish c{97..99} tests

---
 bin/varnishtest/tests/c00097.vtc |  2 +-
 bin/varnishtest/tests/c00098.vtc | 26 +++++++++++++-------------
 bin/varnishtest/tests/c00099.vtc | 26 +++++++++++++-------------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/bin/varnishtest/tests/c00097.vtc b/bin/varnishtest/tests/c00097.vtc
index afc77fbd91f..796c6481cc6 100644
--- a/bin/varnishtest/tests/c00097.vtc
+++ b/bin/varnishtest/tests/c00097.vtc
@@ -53,7 +53,7 @@ client c4 {
 } -start
 
 # Wait until c2-c4 are on the waitinglist
-delay 1
+varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep == 3
 
 # Open up the response headers from s1, and as a result HSH_Unbusy
diff --git a/bin/varnishtest/tests/c00098.vtc b/bin/varnishtest/tests/c00098.vtc
index 5b34d3a75ad..14824527521 100644
--- a/bin/varnishtest/tests/c00098.vtc
+++ b/bin/varnishtest/tests/c00098.vtc
@@ -70,17 +70,17 @@ server s6 {
 
 varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_exponent=2" -arg "-p debug=+syncvsl" -arg "-p debug=+waitinglist" -vcl+backend {
 	sub vcl_backend_fetch {
-		if (bereq.http.client == "1") {
+		if (bereq.http.user-agent == "c1") {
 			set bereq.backend = s1;
-		} else if (bereq.http.client == "2") {
+		} else if (bereq.http.user-agent == "c2") {
 			set bereq.backend = s2;
-		} else if (bereq.http.client == "3") {
+		} else if (bereq.http.user-agent == "c3") {
 			set bereq.backend = s3;
-		} else if (bereq.http.client == "4") {
+		} else if (bereq.http.user-agent == "c4") {
 			set bereq.backend = s4;
-		} else if (bereq.http.client == "5") {
+		} else if (bereq.http.user-agent == "c5") {
 			set bereq.backend = s5;
-		} else if (bereq.http.client == "6") {
+		} else if (bereq.http.user-agent == "c6") {
 			set bereq.backend = s6;
 		}
 	}
@@ -90,7 +90,7 @@ varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_e
 } -start
 
 client c1 {
-	txreq -url /hfp -hdr "Client: 1"
+	txreq
 	rxresp
 } -start
 
@@ -98,32 +98,32 @@ client c1 {
 barrier b1 sync
 
 client c2 {
-	txreq -url /hfp -hdr "Client: 2"
+	txreq
 	rxresp
 } -start
 
 client c3 {
-	txreq -url /hfp -hdr "Client: 3"
+	txreq
 	rxresp
 } -start
 
 client c4 {
-	txreq -url /hfp -hdr "Client: 4"
+	txreq
 	rxresp
 } -start
 
 client c5 {
-	txreq -url /hfp -hdr "Client: 5"
+	txreq
 	rxresp
 } -start
 
 client c6 {
-	txreq -url /hfp -hdr "Client: 6"
+	txreq
 	rxresp
 } -start
 
 # Wait until c2-c6 are on the waitinglist
-delay 1
+varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep == 5
 
 # Open up the response headers from s1, and as a result HSH_Unbusy
diff --git a/bin/varnishtest/tests/c00099.vtc b/bin/varnishtest/tests/c00099.vtc
index 4bbd904a021..9ee17759756 100644
--- a/bin/varnishtest/tests/c00099.vtc
+++ b/bin/varnishtest/tests/c00099.vtc
@@ -70,17 +70,17 @@ server s6 {
 
 varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_exponent=2" -arg "-p debug=+syncvsl" -arg "-p debug=+waitinglist" -vcl+backend {
 	sub vcl_backend_fetch {
-		if (bereq.http.client == "1") {
+		if (bereq.http.user-agent == "c1") {
 			set bereq.backend = s1;
-		} else if (bereq.http.client == "2") {
+		} else if (bereq.http.user-agent == "c2") {
 			set bereq.backend = s2;
-		} else if (bereq.http.client == "3") {
+		} else if (bereq.http.user-agent == "c3") {
 			set bereq.backend = s3;
-		} else if (bereq.http.client == "4") {
+		} else if (bereq.http.user-agent == "c4") {
 			set bereq.backend = s4;
-		} else if (bereq.http.client == "5") {
+		} else if (bereq.http.user-agent == "c5") {
 			set bereq.backend = s5;
-		} else if (bereq.http.client == "6") {
+		} else if (bereq.http.user-agent == "c6") {
 			set bereq.backend = s6;
 		}
 	}
@@ -90,7 +90,7 @@ varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_e
 } -start
 
 client c1 {
-	txreq -url /hfm -hdr "Client: 1"
+	txreq
 	rxresp
 } -start
 
@@ -98,32 +98,32 @@ client c1 {
 barrier b1 sync
 
 client c2 {
-	txreq -url /hfm -hdr "Client: 2"
+	txreq
 	rxresp
 } -start
 
 client c3 {
-	txreq -url /hfm -hdr "Client: 3"
+	txreq
 	rxresp
 } -start
 
 client c4 {
-	txreq -url /hfm -hdr "Client: 4"
+	txreq
 	rxresp
 } -start
 
 client c5 {
-	txreq -url /hfm -hdr "Client: 5"
+	txreq
 	rxresp
 } -start
 
 client c6 {
-	txreq -url /hfm -hdr "Client: 6"
+	txreq
 	rxresp
 } -start
 
 # Wait until c2-c6 are on the waitinglist
-delay 1
+varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep == 5
 
 # Open up the response headers from s1, and as a result HSH_Unbusy

From da96634e406c526a6760a9d045f2b235e4bc1be8 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Mon, 2 Oct 2023 13:25:29 +0200
Subject: [PATCH 02/14] hash: Retire unused HSH_CONTINUE enum

---
 bin/varnishd/hash/hash_slinger.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bin/varnishd/hash/hash_slinger.h b/bin/varnishd/hash/hash_slinger.h
index a1a9c0e8214..c7314754460 100644
--- a/bin/varnishd/hash/hash_slinger.h
+++ b/bin/varnishd/hash/hash_slinger.h
@@ -52,7 +52,6 @@ struct hash_slinger {
 };
 
 enum lookup_e {
-	HSH_CONTINUE,
 	HSH_MISS,
 	HSH_BUSY,
 	HSH_HIT,

From acfab776f11b7033ff1b1d0867c5333a28c533f8 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Mon, 2 Oct 2023 13:35:49 +0200
Subject: [PATCH 03/14] hash: Move enum lookup_e to cache_objhead.h

It has nothing to do with the hash lookup implementation, this is the
outcome of a cache lookup.
---
 bin/varnishd/cache/cache_fetch.c    | 1 -
 bin/varnishd/cache/cache_objhead.h  | 9 +++++++++
 bin/varnishd/cache/cache_req_body.c | 1 -
 bin/varnishd/cache/cache_req_fsm.c  | 1 -
 bin/varnishd/hash/hash_slinger.h    | 9 ---------
 5 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c
index 027b617f571..55c14f1d446 100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@ -34,7 +34,6 @@
 #include "cache_varnishd.h"
 #include "cache_filter.h"
 #include "cache_objhead.h"
-#include "hash/hash_slinger.h"
 #include "storage/storage.h"
 #include "vcl.h"
 #include "vtim.h"
diff --git a/bin/varnishd/cache/cache_objhead.h b/bin/varnishd/cache/cache_objhead.h
index bc1782379e3..58b53650294 100644
--- a/bin/varnishd/cache/cache_objhead.h
+++ b/bin/varnishd/cache/cache_objhead.h
@@ -56,6 +56,15 @@ struct objhead {
 #define hoh_head _u.n.u_n_hoh_head
 };
 
+enum lookup_e {
+	HSH_MISS,
+	HSH_HITMISS,
+	HSH_HITPASS,
+	HSH_HIT,
+	HSH_GRACE,
+	HSH_BUSY,
+};
+
 void HSH_Fail(struct objcore *);
 void HSH_Kill(struct objcore *);
 void HSH_Insert(struct worker *, const void *hash, struct objcore *,
diff --git a/bin/varnishd/cache/cache_req_body.c b/bin/varnishd/cache/cache_req_body.c
index bca8717656f..3cbd627348f 100644
--- a/bin/varnishd/cache/cache_req_body.c
+++ b/bin/varnishd/cache/cache_req_body.c
@@ -41,7 +41,6 @@
 
 #include "vtim.h"
 #include "storage/storage.h"
-#include "hash/hash_slinger.h"
 
 /*----------------------------------------------------------------------
  * Pull the req.body in via/into a objcore
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index 789cea5408a..bb1f6951dd3 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -46,7 +46,6 @@
 #include "cache_transport.h"
 #include "vcc_interface.h"
 
-#include "hash/hash_slinger.h"
 #include "http1/cache_http1.h"
 #include "storage/storage.h"
 #include "vcl.h"
diff --git a/bin/varnishd/hash/hash_slinger.h b/bin/varnishd/hash/hash_slinger.h
index c7314754460..1f0e980a7cb 100644
--- a/bin/varnishd/hash/hash_slinger.h
+++ b/bin/varnishd/hash/hash_slinger.h
@@ -51,15 +51,6 @@ struct hash_slinger {
 	hash_deref_f		*deref;
 };
 
-enum lookup_e {
-	HSH_MISS,
-	HSH_BUSY,
-	HSH_HIT,
-	HSH_HITMISS,
-	HSH_HITPASS,
-	HSH_GRACE
-};
-
 /* mgt_hash.c */
 void HSH_config(const char *);
 

From 84fda34bf380e4484a506f3c4e5fd9b96d31fac9 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Fri, 29 Sep 2023 09:42:14 +0200
Subject: [PATCH 04/14] hash: Track the most recent busy objcore for lookups

This is a prerequisite before moving waiting list ownership from objhead
to objcore.
---
 bin/varnishd/cache/cache_hash.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 4b379988828..feb64b0c599 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -365,9 +365,9 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	struct objhead *oh;
 	struct objcore *oc;
 	struct objcore *exp_oc;
+	struct objcore *busy_oc;
 	const struct vcf_return *vr;
 	vtim_real exp_t_origin;
-	int busy_found;
 	const uint8_t *vary;
 	intmax_t boc_progress;
 	unsigned xid = 0;
@@ -417,7 +417,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	}
 
 	assert(oh->refcnt > 0);
-	busy_found = 0;
+	busy_oc = NULL;
 	exp_oc = NULL;
 	exp_t_origin = 0.0;
 	VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
@@ -434,6 +434,8 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 
 		CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
 		if (oc->flags & OC_F_BUSY) {
+			if (busy_oc != NULL)
+				continue;
 			if (req->hash_ignore_busy)
 				continue;
 
@@ -444,7 +446,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 				continue;
 			}
 
-			busy_found = 1;
+			busy_oc = oc;
 			continue;
 		}
 
@@ -547,7 +549,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	else
 		boc_progress = -1;
 
-	if (!busy_found) {
+	if (busy_oc == NULL) {
 		*bocp = hsh_insert_busyobj(wrk, oh);
 
 		if (exp_oc != NULL) {
@@ -564,7 +566,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		return (HSH_MISS);
 	}
 
-	AN(busy_found);
+	CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
 	if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
 		/* we do not wait on the busy object if in grace */
 		exp_oc->refcnt++;

From 6673373243fdb2c6b8ae63fd6086b7ee3e76c349 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Mon, 2 Oct 2023 13:58:40 +0200
Subject: [PATCH 05/14] hash: Refine the objhead lookup result

Adding expired variants of miss and hitmiss outcomes will enable better
code organization, mainly the extraction of the objhead lookup step into
its own function.
---
 bin/varnishd/cache/cache_hash.c    | 26 +++++++++++++++-----------
 bin/varnishd/cache/cache_objhead.h |  2 ++
 bin/varnishd/cache/cache_req_fsm.c | 23 +++++++++++++++--------
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index feb64b0c599..65642ea8c0d 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -541,7 +541,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		Lck_Unlock(&oh->mtx);
 		wrk->stats->cache_hitmiss++;
 		VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
-		return (HSH_HITMISS);
+		return (HSH_HITMISS_EXP);
 	}
 
 	if (exp_oc != NULL && exp_oc->boc != NULL)
@@ -552,18 +552,22 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	if (busy_oc == NULL) {
 		*bocp = hsh_insert_busyobj(wrk, oh);
 
-		if (exp_oc != NULL) {
-			exp_oc->refcnt++;
-			*ocp = exp_oc;
-			if (EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
-				exp_oc->hits++;
-				Lck_Unlock(&oh->mtx);
-				Req_LogHit(wrk, req, exp_oc, boc_progress);
-				return (HSH_GRACE);
-			}
+		if (exp_oc == NULL) {
+			Lck_Unlock(&oh->mtx);
+			return (HSH_MISS);
 		}
+
+		exp_oc->refcnt++;
+		*ocp = exp_oc;
+		if (EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
+			exp_oc->hits++;
+			Lck_Unlock(&oh->mtx);
+			Req_LogHit(wrk, req, exp_oc, boc_progress);
+			return (HSH_GRACE);
+		}
+
 		Lck_Unlock(&oh->mtx);
-		return (HSH_MISS);
+		return (HSH_MISS_EXP);
 	}
 
 	CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
diff --git a/bin/varnishd/cache/cache_objhead.h b/bin/varnishd/cache/cache_objhead.h
index 58b53650294..4effba9d070 100644
--- a/bin/varnishd/cache/cache_objhead.h
+++ b/bin/varnishd/cache/cache_objhead.h
@@ -58,7 +58,9 @@ struct objhead {
 
 enum lookup_e {
 	HSH_MISS,
+	HSH_MISS_EXP,
 	HSH_HITMISS,
+	HSH_HITMISS_EXP,
 	HSH_HITPASS,
 	HSH_HIT,
 	HSH_GRACE,
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index bb1f6951dd3..6b2b769722f 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -590,26 +590,33 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	}
 
 	AZ(req->objcore);
-	if (lr == HSH_MISS || lr == HSH_HITMISS) {
+	switch (lr) {
+	case HSH_HITMISS:
+	case HSH_HITMISS_EXP:
+		req->is_hitmiss = 1;
+		/* FALL_THROUGH */
+	case HSH_MISS:
+	case HSH_MISS_EXP:
 		AN(busy);
 		AN(busy->flags & OC_F_BUSY);
 		req->objcore = busy;
 		req->stale_oc = oc;
 		req->req_step = R_STP_MISS;
-		if (lr == HSH_HITMISS)
-			req->is_hitmiss = 1;
 		return (REQ_FSM_MORE);
-	}
-	if (lr == HSH_HITPASS) {
+	case HSH_HITPASS:
 		AZ(busy);
 		AZ(oc);
 		req->req_step = R_STP_PASS;
 		req->is_hitpass = 1;
 		return (REQ_FSM_MORE);
+	case HSH_HIT:
+	case HSH_GRACE:
+		break;
+	case HSH_BUSY:
+	default:
+		WRONG("Invalid lookup result");
 	}
 
-	assert(lr == HSH_HIT || lr == HSH_GRACE);
-
 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 	AZ(oc->flags & OC_F_BUSY);
 	req->objcore = oc;
@@ -1067,7 +1074,7 @@ cnt_purge(struct worker *wrk, struct req *req)
 	AZ(req->objcore);
 	req->hash_always_miss = 1;
 	lr = HSH_Lookup(req, &oc, &boc);
-	assert (lr == HSH_MISS);
+	assert(lr == HSH_MISS);
 	AZ(oc);
 	CHECK_OBJ_NOTNULL(boc, OBJCORE_MAGIC);
 	VRY_Finish(req, DISCARD);

From ebfc019ea64c122503ef31f9c094e1915b29f489 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Fri, 29 Sep 2023 09:51:14 +0200
Subject: [PATCH 06/14] hash: Extract objhead lookup from HSH_Lookup()

This creates a separate objhead lookup after the hash lookup, and a
central location to process the objhead lookup result. This makes both
the lookup code and the lookup result processing code more compact and
easier to follow individually. In particular, the objhead lookup now
operates entirely under the objhead lock and has a much better signal
to noise ratio.
---
 bin/varnishd/cache/cache_hash.c | 264 +++++++++++++++++++-------------
 1 file changed, 158 insertions(+), 106 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 65642ea8c0d..b02af4fcd2e 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -358,63 +358,30 @@ hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
 /*---------------------------------------------------------------------
  */
 
-enum lookup_e
-HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
+static enum lookup_e
+hsh_objhead_lookup(struct objhead *oh, struct req *req, struct objcore **ocp,
+    struct objcore **bocp)
 {
 	struct worker *wrk;
-	struct objhead *oh;
 	struct objcore *oc;
 	struct objcore *exp_oc;
 	struct objcore *busy_oc;
 	const struct vcf_return *vr;
 	vtim_real exp_t_origin;
 	const uint8_t *vary;
-	intmax_t boc_progress;
-	unsigned xid = 0;
-	float dttl = 0.0;
 
+	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+	Lck_AssertHeld(&oh->mtx);
+	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+	wrk = req->wrk;
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	AN(ocp);
 	*ocp = NULL;
 	AN(bocp);
 	*bocp = NULL;
 
-	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-	wrk = req->wrk;
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
-	CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
-	CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
-	AN(hash);
-
-	hsh_prealloc(wrk);
-	if (DO_DEBUG(DBG_HASHEDGE))
-		hsh_testmagic(req->digest);
-
-	if (req->hash_objhead != NULL) {
-		/*
-		 * This req came off the waiting list, and brings an
-		 * oh refcnt with it.
-		 */
-		CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
-		oh = req->hash_objhead;
-		Lck_Lock(&oh->mtx);
-		req->hash_objhead = NULL;
-	} else {
-		AN(wrk->wpriv->nobjhead);
-		oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
-	}
-
-	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
-	Lck_AssertHeld(&oh->mtx);
-
-	if (req->hash_always_miss) {
-		/* XXX: should we do predictive Vary in this case ? */
-		/* Insert new objcore in objecthead and release mutex */
-		*bocp = hsh_insert_busyobj(wrk, oh);
-		/* NB: no deref of objhead, new object inherits reference */
-		Lck_Unlock(&oh->mtx);
+	if (req->hash_always_miss)
 		return (HSH_MISS);
-	}
 
 	assert(oh->refcnt > 0);
 	busy_oc = NULL;
@@ -439,7 +406,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 			if (req->hash_ignore_busy)
 				continue;
 
-			if (oc->boc && oc->boc->vary != NULL &&
+			if (oc->boc != NULL && oc->boc->vary != NULL &&
 			    !req->hash_ignore_vary &&
 			    !VRY_Match(req, oc->boc->vary)) {
 				wrk->strangelove++;
@@ -501,30 +468,15 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		(void)req->vcf->func(req, &oc, &exp_oc, 1);
 
 	if (oc != NULL && oc->flags & OC_F_HFP) {
-		xid = VXID(ObjGetXID(wrk, oc));
-		dttl = EXP_Dttl(req, oc);
-		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
-		wrk->stats->cache_hitpass++;
-		VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
+		*ocp = oc;
 		return (HSH_HITPASS);
 	}
 
 	if (oc != NULL) {
 		*ocp = oc;
 		oc->refcnt++;
-		if (oc->flags & OC_F_HFM) {
-			xid = VXID(ObjGetXID(wrk, oc));
-			dttl = EXP_Dttl(req, oc);
-			*bocp = hsh_insert_busyobj(wrk, oh);
-			Lck_Unlock(&oh->mtx);
-			wrk->stats->cache_hitmiss++;
-			VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
+		if (oc->flags & OC_F_HFM)
 			return (HSH_HITMISS);
-		}
-		oc->hits++;
-		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
-		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
-		Req_LogHit(wrk, req, oc, boc_progress);
 		return (HSH_HIT);
 	}
 
@@ -533,75 +485,175 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		 * expired HFM ("grace/keep HFM")
 		 *
 		 * XXX should HFM objects actually have grace/keep ?
-		 * XXX also:  why isn't *ocp = exp_oc ?
 		 */
-		xid = VXID(ObjGetXID(wrk, exp_oc));
-		dttl = EXP_Dttl(req, exp_oc);
-		*bocp = hsh_insert_busyobj(wrk, oh);
-		Lck_Unlock(&oh->mtx);
-		wrk->stats->cache_hitmiss++;
-		VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
+		*ocp = exp_oc;
 		return (HSH_HITMISS_EXP);
 	}
 
-	if (exp_oc != NULL && exp_oc->boc != NULL)
-		boc_progress = exp_oc->boc->fetched_so_far;
-	else
-		boc_progress = -1;
-
 	if (busy_oc == NULL) {
-		*bocp = hsh_insert_busyobj(wrk, oh);
-
-		if (exp_oc == NULL) {
-			Lck_Unlock(&oh->mtx);
+		if (exp_oc == NULL)
 			return (HSH_MISS);
-		}
-
-		exp_oc->refcnt++;
 		*ocp = exp_oc;
-		if (EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
-			exp_oc->hits++;
-			Lck_Unlock(&oh->mtx);
-			Req_LogHit(wrk, req, exp_oc, boc_progress);
+		exp_oc->refcnt++;
+		if (EXP_Ttl_grace(req, exp_oc) >= req->t_req)
 			return (HSH_GRACE);
-		}
-
-		Lck_Unlock(&oh->mtx);
 		return (HSH_MISS_EXP);
 	}
 
 	CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
+	*bocp = busy_oc;
+
 	if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
 		/* we do not wait on the busy object if in grace */
-		exp_oc->refcnt++;
 		*ocp = exp_oc;
-		exp_oc->hits++;
-		AN(hsh_deref_objhead_unlock(wrk, &oh, 0));
-		Req_LogHit(wrk, req, exp_oc, boc_progress);
+		exp_oc->refcnt++;
 		return (HSH_GRACE);
 	}
 
-	/* There are one or more busy objects, wait for them */
-	VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
+	return (HSH_BUSY);
+}
 
-	AZ(req->hash_ignore_busy);
+/*---------------------------------------------------------------------
+ */
 
-	/*
-	 * The objhead reference transfers to the sess, we get it
-	 * back when the sess comes off the waiting list and
-	 * calls us again
-	 */
-	req->hash_objhead = oh;
-	req->wrk = NULL;
-	req->waitinglist = 1;
+enum lookup_e
+HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
+{
+	enum lookup_e lr;
+	struct worker *wrk;
+	struct objhead *oh;
+	struct objcore *oc;
+	struct objcore *busy_oc;
+	intmax_t boc_progress;
+	unsigned xid = 0;
+	float dttl = 0.0;
 
-	if (DO_DEBUG(DBG_WAITINGLIST))
-		VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
+	AN(ocp);
+	*ocp = NULL;
+	AN(bocp);
+	*bocp = NULL;
 
-	Lck_Unlock(&oh->mtx);
+	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+	wrk = req->wrk;
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
+	CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
+	CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
+	AN(hash);
 
-	wrk->stats->busy_sleep++;
-	return (HSH_BUSY);
+	hsh_prealloc(wrk);
+	if (DO_DEBUG(DBG_HASHEDGE))
+		hsh_testmagic(req->digest);
+
+	if (req->hash_objhead != NULL) {
+		/*
+		 * This req came off the waiting list, and brings an
+		 * oh refcnt with it.
+		 */
+		CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
+		oh = req->hash_objhead;
+		Lck_Lock(&oh->mtx);
+		req->hash_objhead = NULL;
+	} else {
+		AN(wrk->wpriv->nobjhead);
+		oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
+	}
+
+	lr = hsh_objhead_lookup(oh, req, &oc, &busy_oc);
+	switch (lr) {
+	case HSH_MISS:
+	case HSH_MISS_EXP:
+		if (lr == HSH_MISS)
+			AZ(oc);
+		else
+			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		*bocp = hsh_insert_busyobj(wrk, oh);
+		Lck_Unlock(&oh->mtx);
+		break;
+	case HSH_HITMISS:
+	case HSH_HITMISS_EXP:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		xid = VXID(ObjGetXID(wrk, oc));
+		dttl = EXP_Dttl(req, oc);
+		*bocp = hsh_insert_busyobj(wrk, oh);
+		Lck_Unlock(&oh->mtx);
+		wrk->stats->cache_hitmiss++;
+		VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
+		if (lr == HSH_HITMISS_EXP)
+			oc = NULL;
+		break;
+	case HSH_HITPASS:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		xid = VXID(ObjGetXID(wrk, oc));
+		dttl = EXP_Dttl(req, oc);
+		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
+		wrk->stats->cache_hitpass++;
+		VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
+		oc = NULL;
+		break;
+	case HSH_HIT:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		oc->hits++;
+		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
+		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
+		Req_LogHit(wrk, req, oc, boc_progress);
+		break;
+	case HSH_GRACE:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		CHECK_OBJ_ORNULL(busy_oc, OBJCORE_MAGIC);
+		oc->hits++;
+		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
+		if (busy_oc == NULL) {
+			*bocp = hsh_insert_busyobj(wrk, oh);
+			Lck_Unlock(&oh->mtx);
+		} else {
+			/* we do not wait on the busy object if in grace */
+			AN(hsh_deref_objhead_unlock(wrk, &oh, 0));
+			busy_oc = NULL;
+		}
+		Req_LogHit(wrk, req, oc, boc_progress);
+		break;
+	case HSH_BUSY:
+		AZ(oc);
+		CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
+		AZ(req->hash_ignore_busy);
+
+		/* There are one or more busy objects, wait for them */
+		VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
+
+		/*
+		 * The objhead reference transfers to the sess, we get it
+		 * back when the sess comes off the waiting list and
+		 * calls us again
+		 */
+		req->hash_objhead = oh;
+		req->wrk = NULL;
+		req->waitinglist = 1;
+
+		if (DO_DEBUG(DBG_WAITINGLIST))
+			VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
+
+		Lck_Unlock(&oh->mtx);
+
+		wrk->stats->busy_sleep++;
+		break;
+	default:
+		WRONG("Invalid lookup result");
+	}
+
+	if (oc != NULL)
+		*ocp = oc;
+
+	if (busy_oc != NULL) {
+		assert(busy_oc->refcnt > 0);
+		busy_oc->refcnt++;
+	}
+
+	return (lr);
 }
 
 /*---------------------------------------------------------------------

From 5ce6a481a8f6c1fb3fa475937ff5b3863eb45ef4 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Thu, 28 Sep 2023 08:21:16 +0200
Subject: [PATCH 07/14] hash: Transfer waiting list ownership to the objcore

The advantages of the objhead owning the waiting list are that we need
less waiting list heads as they are shared across multiple objcores, we
get more frequent opportunities to rush waiting requests, and we avoid
one expensive part of the lookup, which is to find an objhead from a
request hash.

The downsides are the requirement to always perform a new lookup, which
increases contention on the objhead lock, and prevents expired but valid
objects to be considered fresh. This is another way to describe the
cache-control no-cache directive: a response considered fresh at fetch
time, but immediately stale afterwards.

Having the waiting list on the objcore means that we reenter the cache
lookup with the object candidate from the previous lookup, so we may
short-circuit the more expensive objhead lookup when the objcore is
compatible with the request rushing off the waiting list. The expensive
request hash lookup is still avoided, just like before, the objcore has
a reference to the objhead.

This creates a new first step in the lookup process:

- rush match (when coming from the waiting list)
- hash lookup (unless coming from the waiting list)
- objhead lookup (unless there was a rush hit)

This moves the infamous waiting list serialization phenomenon to the
vary header match. Knowing that a rushed object is guaranteed to lead to
a cache hit allows the rush policy to be applied wholesale, instead of
exponentially. Only requests incompatible with the objcore vary header
may reenter the waiting list, a scenario no different from the spurious
rush wakeups when operating on the objhead.

The waiting list operations are still guarded by the objhead lock. As it
stands currently, the exponential rush is indirectly systematic, since
the unusable objcore is rushed before the objhead lookup.

This is the first step towards implementing no-cache and private support
at the header field granularity.
---
 bin/varnishd/cache/cache.h         |   5 +-
 bin/varnishd/cache/cache_fetch.c   |   2 +-
 bin/varnishd/cache/cache_hash.c    | 141 ++++++++++++++++----------
 bin/varnishd/cache/cache_obj.c     |   2 +
 bin/varnishd/cache/cache_objhead.h |   1 -
 bin/varnishd/cache/cache_req_fsm.c |   8 +-
 bin/varnishd/cache/cache_vary.c    |   2 +-
 bin/varnishtest/tests/c00125.vtc   | 156 +++++++++++++++++++++++++++++
 8 files changed, 257 insertions(+), 60 deletions(-)
 create mode 100644 bin/varnishtest/tests/c00125.vtc

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index b6af255376d..485340083c4 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -359,6 +359,7 @@ struct objcore {
 	VTAILQ_ENTRY(objcore)	lru_list;
 	VTAILQ_ENTRY(objcore)	ban_list;
 	VSTAILQ_ENTRY(objcore)	exp_list;
+	VTAILQ_HEAD(, req)	waitinglist;
 	struct ban		*ban;
 };
 
@@ -490,8 +491,8 @@ struct req {
 
 	struct objcore		*body_oc;
 
-	/* The busy objhead we sleep on */
-	struct objhead		*hash_objhead;
+	/* The busy objcore we sleep on */
+	struct objcore		*hash_oc;
 
 	/* Built Vary string == workspace reservation */
 	uint8_t			*vary_b;
diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c
index 55c14f1d446..995568a1820 100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@ -922,7 +922,7 @@ vbf_stp_error(struct worker *wrk, struct busyobj *bo)
 
 	stale = bo->stale_oc;
 	oc->t_origin = now;
-	if (!VTAILQ_EMPTY(&oc->objhead->waitinglist)) {
+	if (!VTAILQ_EMPTY(&oc->waitinglist)) {
 		/*
 		 * If there is a waitinglist, it means that there is no
 		 * grace-able object, so cache the error return for a
diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index b02af4fcd2e..951a7860d56 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -75,12 +75,11 @@ struct rush {
 static const struct hash_slinger *hash;
 static struct objhead *private_oh;
 
-static void hsh_rush1(const struct worker *, struct objhead *,
+static void hsh_rush1(const struct worker *, struct objcore *,
     struct rush *, int);
 static void hsh_rush2(struct worker *, struct rush *);
 static int hsh_deref_objhead(struct worker *wrk, struct objhead **poh);
-static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh,
-    int);
+static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh);
 
 /*---------------------------------------------------------------------*/
 
@@ -102,7 +101,6 @@ hsh_newobjhead(void)
 	XXXAN(oh);
 	oh->refcnt = 1;
 	VTAILQ_INIT(&oh->objcs);
-	VTAILQ_INIT(&oh->waitinglist);
 	Lck_New(&oh->mtx, lck_objhdr);
 	return (oh);
 }
@@ -182,7 +180,6 @@ HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
 
 	AZ(oh->refcnt);
 	assert(VTAILQ_EMPTY(&oh->objcs));
-	assert(VTAILQ_EMPTY(&oh->waitinglist));
 	Lck_Delete(&oh->mtx);
 	wrk->stats->n_objecthead--;
 	FREE_OBJ(oh);
@@ -323,8 +320,8 @@ HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
 	oc->flags &= ~OC_F_BUSY;
-	if (!VTAILQ_EMPTY(&oh->waitinglist))
-		hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
+	if (!VTAILQ_EMPTY(&oc->waitinglist))
+		hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
 	Lck_Unlock(&oh->mtx);
 	hsh_rush2(wrk, &rush);
 
@@ -355,6 +352,40 @@ hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
 	return (oc);
 }
 
+/*---------------------------------------------------------------------
+ */
+
+static unsigned
+hsh_rush_match(struct req *req)
+{
+	struct objcore *oc;
+	const uint8_t *vary;
+
+	oc = req->hash_oc;
+	CHECK_OBJ_ORNULL(oc, OBJCORE_MAGIC);
+
+	if (oc == NULL)
+		return (0);
+
+	AZ(oc->flags & OC_F_BUSY);
+	if (oc->flags != 0)
+		return (0);
+
+	/* The objcore was serviceable, all the requests on the waiting list
+	 * were rushed at once.
+	 */
+	assert(VTAILQ_EMPTY(&oc->waitinglist));
+
+	if (req->hash_ignore_vary)
+		return (1);
+	if (!ObjHasAttr(req->wrk, oc, OA_VARY))
+		return (1);
+
+	vary = ObjGetAttr(req->wrk, oc, OA_VARY, NULL);
+	AN(vary);
+	return (VRY_Match(req, vary));
+}
+
 /*---------------------------------------------------------------------
  */
 
@@ -545,15 +576,30 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	if (DO_DEBUG(DBG_HASHEDGE))
 		hsh_testmagic(req->digest);
 
-	if (req->hash_objhead != NULL) {
+	if (hsh_rush_match(req)) {
+		TAKE_OBJ_NOTNULL(oc, &req->hash_oc, OBJCORE_MAGIC);
+		*ocp = oc;
+		oc->hits++;
+		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
+		Req_LogHit(wrk, req, oc, boc_progress);
+		/* NB: since this hit comes from the waiting list instead of
+		 * a regular lookup, grace is not considered. The object is
+		 * fresh in the context of the waiting list, even expired: it
+		 * was successfully just [re]validated by a fetch task.
+		 */
+		return (HSH_HIT);
+	}
+
+	if (req->hash_oc != NULL) {
 		/*
 		 * This req came off the waiting list, and brings an
-		 * oh refcnt with it.
+		 * incompatible hash_oc refcnt with it.
 		 */
-		CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
-		oh = req->hash_objhead;
+		CHECK_OBJ_NOTNULL(req->hash_oc->objhead, OBJHEAD_MAGIC);
+		oh = req->hash_oc->objhead;
+		/* XXX: can we avoid grabbing the oh lock twice here? */
+		(void)HSH_DerefObjCore(wrk, &req->hash_oc, HSH_RUSH_POLICY);
 		Lck_Lock(&oh->mtx);
-		req->hash_objhead = NULL;
 	} else {
 		AN(wrk->wpriv->nobjhead);
 		oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
@@ -589,7 +635,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		AZ(busy_oc);
 		xid = VXID(ObjGetXID(wrk, oc));
 		dttl = EXP_Dttl(req, oc);
-		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
+		AN(hsh_deref_objhead_unlock(wrk, &oh));
 		wrk->stats->cache_hitpass++;
 		VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
 		oc = NULL;
@@ -599,7 +645,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		AZ(busy_oc);
 		oc->hits++;
 		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
-		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
+		AN(hsh_deref_objhead_unlock(wrk, &oh));
 		Req_LogHit(wrk, req, oc, boc_progress);
 		break;
 	case HSH_GRACE:
@@ -612,7 +658,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 			Lck_Unlock(&oh->mtx);
 		} else {
 			/* we do not wait on the busy object if in grace */
-			AN(hsh_deref_objhead_unlock(wrk, &oh, 0));
+			AN(hsh_deref_objhead_unlock(wrk, &oh));
 			busy_oc = NULL;
 		}
 		Req_LogHit(wrk, req, oc, boc_progress);
@@ -623,19 +669,21 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		AZ(req->hash_ignore_busy);
 
 		/* There are one or more busy objects, wait for them */
-		VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
+		VTAILQ_INSERT_TAIL(&busy_oc->waitinglist, req, w_list);
 
 		/*
-		 * The objhead reference transfers to the sess, we get it
-		 * back when the sess comes off the waiting list and
-		 * calls us again
+		 * The objcore reference transfers to the req, we get it
+		 * back when the req comes off the waiting list and calls
+		 * us again
 		 */
-		req->hash_objhead = oh;
+		AZ(req->hash_oc);
+		req->hash_oc = busy_oc;
 		req->wrk = NULL;
 		req->waitinglist = 1;
 
 		if (DO_DEBUG(DBG_WAITINGLIST))
-			VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
+			VSLb(req->vsl, SLT_Debug,
+			    "on waiting list <%p>", busy_oc);
 
 		Lck_Unlock(&oh->mtx);
 
@@ -661,30 +709,35 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
  */
 
 static void
-hsh_rush1(const struct worker *wrk, struct objhead *oh, struct rush *r, int max)
+hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r, int max)
 {
 	int i;
 	struct req *req;
 
 	if (max == 0)
 		return;
-	if (max == HSH_RUSH_POLICY)
-		max = cache_param->rush_exponent;
+	if (max == HSH_RUSH_POLICY) {
+		AZ(oc->flags & OC_F_BUSY);
+		if (oc->flags != 0)
+			max = cache_param->rush_exponent;
+		else
+			max = INT_MAX;
+	}
 	assert(max > 0);
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
 	VTAILQ_INIT(&r->reqs);
-	Lck_AssertHeld(&oh->mtx);
+	Lck_AssertHeld(&oc->objhead->mtx);
 	for (i = 0; i < max; i++) {
-		req = VTAILQ_FIRST(&oh->waitinglist);
+		req = VTAILQ_FIRST(&oc->waitinglist);
 		if (req == NULL)
 			break;
 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
 		wrk->stats->busy_wakeup++;
 		AZ(req->wrk);
-		VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
+		VTAILQ_REMOVE(&oc->waitinglist, req, w_list);
 		VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
 		req->waitinglist = 0;
 	}
@@ -945,9 +998,9 @@ HSH_Unbusy(struct worker *wrk, struct objcore *oc)
 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
 	oc->flags &= ~OC_F_BUSY;
-	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
-		assert(oh->refcnt > 1);
-		hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
+	if (!VTAILQ_EMPTY(&oc->waitinglist)) {
+		assert(oc->refcnt > 1);
+		hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
 	}
 	Lck_Unlock(&oh->mtx);
 	EXP_Insert(wrk, oc); /* Does nothing unless EXP_RefNewObjcore was
@@ -1099,9 +1152,9 @@ HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
 	r = --oc->refcnt;
 	if (!r)
 		VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
-	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
-		assert(oh->refcnt > 1);
-		hsh_rush1(wrk, oh, &rush, rushmax);
+	if (!VTAILQ_EMPTY(&oc->waitinglist)) {
+		assert(oc->refcnt > 0);
+		hsh_rush1(wrk, oc, &rush, rushmax);
 	}
 	Lck_Unlock(&oh->mtx);
 	hsh_rush2(wrk, &rush);
@@ -1124,11 +1177,9 @@ HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
 }
 
 static int
-hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh, int max)
+hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh)
 {
 	struct objhead *oh;
-	struct rush rush;
-	int r;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
@@ -1136,26 +1187,14 @@ hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh, int max)
 	Lck_AssertHeld(&oh->mtx);
 
 	if (oh == private_oh) {
-		assert(VTAILQ_EMPTY(&oh->waitinglist));
 		assert(oh->refcnt > 1);
 		oh->refcnt--;
 		Lck_Unlock(&oh->mtx);
 		return (1);
 	}
 
-	INIT_OBJ(&rush, RUSH_MAGIC);
-	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
-		assert(oh->refcnt > 1);
-		hsh_rush1(wrk, oh, &rush, max);
-	}
-
-	if (oh->refcnt == 1)
-		assert(VTAILQ_EMPTY(&oh->waitinglist));
-
 	assert(oh->refcnt > 0);
-	r = hash->deref(wrk, oh); /* Unlocks oh->mtx */
-	hsh_rush2(wrk, &rush);
-	return (r);
+	return (hash->deref(wrk, oh)); /* Unlocks oh->mtx */
 }
 
 static int
@@ -1167,7 +1206,7 @@ hsh_deref_objhead(struct worker *wrk, struct objhead **poh)
 	TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
 
 	Lck_Lock(&oh->mtx);
-	return (hsh_deref_objhead_unlock(wrk, &oh, 0));
+	return (hsh_deref_objhead_unlock(wrk, &oh));
 }
 
 void
diff --git a/bin/varnishd/cache/cache_obj.c b/bin/varnishd/cache/cache_obj.c
index d92fc1f0d37..43d854904da 100644
--- a/bin/varnishd/cache/cache_obj.c
+++ b/bin/varnishd/cache/cache_obj.c
@@ -142,6 +142,7 @@ ObjNew(const struct worker *wrk)
 	wrk->stats->n_objectcore++;
 	oc->last_lru = NAN;
 	oc->flags = OC_F_BUSY;
+	VTAILQ_INIT(&oc->waitinglist);
 
 	oc->boc = obj_newboc();
 
@@ -160,6 +161,7 @@ ObjDestroy(const struct worker *wrk, struct objcore **p)
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	TAKE_OBJ_NOTNULL(oc, p, OBJCORE_MAGIC);
+	assert(VTAILQ_EMPTY(&oc->waitinglist));
 	if (oc->boc != NULL)
 		obj_deleteboc(&oc->boc);
 	FREE_OBJ(oc);
diff --git a/bin/varnishd/cache/cache_objhead.h b/bin/varnishd/cache/cache_objhead.h
index 4effba9d070..c4f0366bf12 100644
--- a/bin/varnishd/cache/cache_objhead.h
+++ b/bin/varnishd/cache/cache_objhead.h
@@ -40,7 +40,6 @@ struct objhead {
 	struct lock		mtx;
 	VTAILQ_HEAD(,objcore)	objcs;
 	uint8_t			digest[DIGEST_LEN];
-	VTAILQ_HEAD(, req)	waitinglist;
 
 	/*----------------------------------------------------
 	 * The fields below are for the sole private use of
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index 6b2b769722f..c4b986460dd 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -545,7 +545,7 @@ cnt_lookup(struct worker *wrk, struct req *req)
 {
 	struct objcore *oc, *busy;
 	enum lookup_e lr;
-	int had_objhead = 0;
+	int had_objcore = 0;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
@@ -557,8 +557,8 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	VRY_Prep(req);
 
 	AZ(req->objcore);
-	if (req->hash_objhead)
-		had_objhead = 1;
+	if (req->hash_oc)
+		had_objcore = 1;
 	wrk->strangelove = 0;
 	lr = HSH_Lookup(req, &oc, &busy);
 	if (lr == HSH_BUSY) {
@@ -574,7 +574,7 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	if ((unsigned)wrk->strangelove >= cache_param->vary_notice)
 		VSLb(req->vsl, SLT_Notice, "vsl: High number of variants (%d)",
 		    wrk->strangelove);
-	if (had_objhead)
+	if (had_objcore)
 		VSLb_ts_req(req, "Waitinglist", W_TIM_real(wrk));
 
 	if (req->vcf != NULL) {
diff --git a/bin/varnishd/cache/cache_vary.c b/bin/varnishd/cache/cache_vary.c
index 101eca231b0..06956441fd3 100644
--- a/bin/varnishd/cache/cache_vary.c
+++ b/bin/varnishd/cache/cache_vary.c
@@ -224,7 +224,7 @@ vry_cmp(const uint8_t *v1, const uint8_t *v2)
 void
 VRY_Prep(struct req *req)
 {
-	if (req->hash_objhead == NULL) {
+	if (req->hash_oc == NULL) {
 		/* Not a waiting list return */
 		AZ(req->vary_b);
 		AZ(req->vary_e);
diff --git a/bin/varnishtest/tests/c00125.vtc b/bin/varnishtest/tests/c00125.vtc
new file mode 100644
index 00000000000..8fbca4f46b2
--- /dev/null
+++ b/bin/varnishtest/tests/c00125.vtc
@@ -0,0 +1,156 @@
+varnishtest "successful expired waiting list hit"
+
+barrier b1 cond 2
+barrier b2 cond 2
+barrier b3 cond 2
+barrier b4 cond 2
+
+
+server s1 {
+	rxreq
+	expect req.http.user-agent == c1
+	expect req.http.bgfetch == false
+	barrier b1 sync
+	barrier b2 sync
+	txresp -hdr "Cache-Control: max-age=60" -hdr "Age: 120"
+
+	rxreq
+	expect req.http.user-agent == c3
+	expect req.http.bgfetch == true
+	txresp
+
+	# The no-cache case only works with a complicit VCL, for now.
+	rxreq
+	expect req.http.user-agent == c4
+	expect req.http.bgfetch == false
+	barrier b3 sync
+	barrier b4 sync
+	txresp -hdr "Cache-Control: no-cache"
+
+	rxreq
+	expect req.http.user-agent == c6
+	expect req.http.bgfetch == false
+	txresp -hdr "Cache-Control: no-cache"
+} -start
+
+varnish v1 -cliok "param.set default_grace 1h"
+varnish v1 -cliok "param.set thread_pools 1"
+varnish v1 -cliok "param.set debug +syncvsl,+waitinglist"
+varnish v1 -vcl+backend {
+	sub vcl_backend_fetch {
+		set bereq.http.bgfetch = bereq.is_bgfetch;
+	}
+	sub vcl_beresp_stale {
+		# We just validated a stale object, do not mark it as
+		# uncacheable. The object remains available for grace
+		# hits and background fetches.
+		return;
+	}
+	sub vcl_beresp_control {
+		if (beresp.http.cache-control == "no-cache") {
+			# Keep beresp.uncacheable clear.
+			return;
+		}
+	}
+	sub vcl_deliver {
+		set resp.http.obj-hits = obj.hits;
+		set resp.http.obj-ttl = obj.ttl;
+	}
+} -start
+
+client c1 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1001
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl < 0
+} -start
+
+barrier b1 sync
+
+client c2 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1004 1002"
+	expect resp.http.obj-hits == 1
+	expect resp.http.obj-ttl < 0
+} -start
+
+varnish v1 -expect busy_sleep == 1
+barrier b2 sync
+
+client c1 -wait
+client c2 -wait
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 1
+varnish v1 -expect cache_hit == 1
+varnish v1 -expect cache_hit_grace == 0
+varnish v1 -expect s_bgfetch == 0
+
+client c3 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1006 1002"
+	expect resp.http.obj-hits == 2
+	expect resp.http.obj-ttl < 0
+} -run
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 1
+varnish v1 -expect cache_hit == 2
+varnish v1 -expect cache_hit_grace == 1
+varnish v1 -expect s_bgfetch == 1
+
+# The only way for a plain no-cache to be hit is to have a non-zero keep.
+varnish v1 -cliok "param.set default_ttl 0"
+varnish v1 -cliok "param.set default_grace 0"
+varnish v1 -cliok "param.set default_keep 1h"
+
+client c4 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1009
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl <= 0
+} -start
+
+barrier b3 sync
+
+client c5 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1012 1010"
+	expect resp.http.obj-hits == 1
+	expect resp.http.obj-ttl <= 0
+} -start
+
+varnish v1 -expect busy_sleep == 2
+barrier b4 sync
+
+client c4 -wait
+client c5 -wait
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 2
+varnish v1 -expect cache_hit == 3
+varnish v1 -expect cache_hit_grace == 1
+varnish v1 -expect s_bgfetch == 1
+
+# No hit when not on the waiting list
+client c6 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1014
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl <= 0
+} -run

From 1ba1a209aae37e2468aa037d62c7383cdd09ee10 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Fri, 29 Sep 2023 10:12:14 +0200
Subject: [PATCH 08/14] hash: No rush for synthetic insertions

Now that objcores own their waiting lists, they can no longer have
anything waiting for them at creation time.
---
 bin/varnishd/cache/cache_hash.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 951a7860d56..ae8bef61cca 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -284,17 +284,16 @@ HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
     struct ban *ban)
 {
 	struct objhead *oh;
-	struct rush rush;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
 	AN(digest);
 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 	AN(ban);
+	assert(VTAILQ_EMPTY(&oc->waitinglist));
 	AN(oc->flags & OC_F_BUSY);
 	AZ(oc->flags & OC_F_PRIVATE);
 	assert(oc->refcnt == 1);
-	INIT_OBJ(&rush, RUSH_MAGIC);
 
 	hsh_prealloc(wrk);
 
@@ -320,10 +319,7 @@ HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
 	oc->flags &= ~OC_F_BUSY;
-	if (!VTAILQ_EMPTY(&oc->waitinglist))
-		hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
 	Lck_Unlock(&oh->mtx);
-	hsh_rush2(wrk, &rush);
 
 	EXP_Insert(wrk, oc);
 }

From 92b94745692eb02d76d16c608bfe56e6f77ef3c1 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Fri, 29 Sep 2023 10:15:19 +0200
Subject: [PATCH 09/14] hash: Unconditionally rush the waiting list

This has the same effect, with slightly less ceremony.
---
 bin/varnishd/cache/cache_hash.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index ae8bef61cca..1f0135f58fb 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -730,6 +730,7 @@ hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r, int max)
 		req = VTAILQ_FIRST(&oc->waitinglist);
 		if (req == NULL)
 			break;
+		assert(oc->refcnt > 1);
 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
 		wrk->stats->busy_wakeup++;
 		AZ(req->wrk);
@@ -994,10 +995,7 @@ HSH_Unbusy(struct worker *wrk, struct objcore *oc)
 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
 	oc->flags &= ~OC_F_BUSY;
-	if (!VTAILQ_EMPTY(&oc->waitinglist)) {
-		assert(oc->refcnt > 1);
-		hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
-	}
+	hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
 	Lck_Unlock(&oh->mtx);
 	EXP_Insert(wrk, oc); /* Does nothing unless EXP_RefNewObjcore was
 			      * called */

From 0f77214e080febeab1a17e3534c750316675b6bc Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Fri, 29 Sep 2023 11:04:21 +0200
Subject: [PATCH 10/14] hash: Delay waiting list reference accounting

The objcore is officially referenced by a request when it leaves its
waiting list during a rush. This allows migrating a waiting list from
one objcore to the next without the need to touch reference counters.

The hash_oc field will still have to be updated. This field must be
be kept to navigate from a request to its waiting list to implement
walking away from a waiting list.

Refs #3835
---
 bin/varnishd/cache/cache_hash.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 1f0135f58fb..519f8cde305 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -670,8 +670,10 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		/*
 		 * The objcore reference transfers to the req, we get it
 		 * back when the req comes off the waiting list and calls
-		 * us again
+		 * us again. The reference counter increase is deferred to
+		 * the rush hour.
 		 */
+		assert(busy_oc->refcnt > 0);
 		AZ(req->hash_oc);
 		req->hash_oc = busy_oc;
 		req->wrk = NULL;
@@ -692,11 +694,6 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	if (oc != NULL)
 		*ocp = oc;
 
-	if (busy_oc != NULL) {
-		assert(busy_oc->refcnt > 0);
-		busy_oc->refcnt++;
-	}
-
 	return (lr);
 }
 
@@ -730,7 +727,8 @@ hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r, int max)
 		req = VTAILQ_FIRST(&oc->waitinglist);
 		if (req == NULL)
 			break;
-		assert(oc->refcnt > 1);
+		assert(oc->refcnt > 0);
+		oc->refcnt++;
 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
 		wrk->stats->busy_wakeup++;
 		AZ(req->wrk);

From 57ab2f8b71c3bf478ecb3ee94588897567190b91 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Fri, 29 Sep 2023 15:37:46 +0200
Subject: [PATCH 11/14] hash: Log the VXID of requests being rushed

This is very useful to manually verify the rush_exponent behavior.
---
 bin/varnishd/cache/cache_hash.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 519f8cde305..ca8c941ba29 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -705,6 +705,7 @@ static void
 hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r, int max)
 {
 	int i;
+	unsigned xid = 0;
 	struct req *req;
 
 	if (max == 0)
@@ -727,6 +728,13 @@ hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r, int max)
 		req = VTAILQ_FIRST(&oc->waitinglist);
 		if (req == NULL)
 			break;
+
+		if (DO_DEBUG(DBG_WAITINGLIST)) {
+			xid = VXID(req->vsl->wid);
+			VSLb(wrk->vsl, SLT_Debug,
+			    "waiting list rush for req %u", xid);
+		}
+
 		assert(oc->refcnt > 0);
 		oc->refcnt++;
 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);

From 0e005def3e8279e2b0bf8d9e21f3b7992cc6cb95 Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Fri, 29 Sep 2023 15:40:44 +0200
Subject: [PATCH 12/14] hash: Make the exponential rush exponential again

After the migration of the waiting list to the objcore, the result was
that each request would rush more requests upon reembarking by dropping
the waited objcore reference before the objhead lookup.

The reference is now dropped after the objhead lookup, allowing for the
lookup outcome to be checked, either triggerring a complete rush when
the object is serviceable (modulus vary match) or moving the waiting
list to the next busy objcore.

This avoids the spurious wakeups caused by objhead rushes when waiting
lists were owned by objheads. This is however a missed opportunity when
there are multiple concurrent busy objcores. This could be alleviated
by linking busy objcores together but at this point this is already a
net improvement. The only way to get multiple concurrent busy objcores
on the same objhead would be through the hash_ignore_busy flag anyway.
---
 bin/varnishd/cache/cache_hash.c  | 84 +++++++++++++++++++++++++++++---
 bin/varnishtest/tests/c00099.vtc | 54 +++++++++++++++++++-
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index ca8c941ba29..5553d9f2bca 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -382,6 +382,42 @@ hsh_rush_match(struct req *req)
 	return (VRY_Match(req, vary));
 }
 
+static void
+hsh_rush_move(struct objcore *new_oc, struct objcore *old_oc)
+{
+	struct req *req;
+
+	CHECK_OBJ_NOTNULL(new_oc, OBJCORE_MAGIC);
+	CHECK_OBJ_ORNULL(old_oc, OBJCORE_MAGIC);
+
+	if (old_oc == NULL || VTAILQ_EMPTY(&old_oc->waitinglist))
+		return;
+
+	assert(old_oc->objhead == new_oc->objhead);
+	assert(old_oc->refcnt > 0);
+	assert(new_oc->refcnt > 0);
+	Lck_AssertHeld(&old_oc->objhead->mtx);
+
+	/* NB: req holds a weak reference of its hash_oc, so no reference
+	 * counting is needed when moving to the new_oc. An actual old_oc
+	 * reference should be held by either the fetch task rushing its
+	 * waiting list at unbusy time, or a rushed request exponentially
+	 * rushing other requests from the waiting list.
+	 */
+	VTAILQ_FOREACH(req, &old_oc->waitinglist, w_list) {
+		CHECK_OBJ(req, REQ_MAGIC);
+		assert(req->hash_oc == old_oc);
+		req->hash_oc = new_oc;
+	}
+
+	/* NB: The double concatenation of lists allows requests that were
+	 * waiting for the old_oc show up first in the waiting list of the
+	 * new_oc.
+	 */
+	VTAILQ_CONCAT(&old_oc->waitinglist, &new_oc->waitinglist, w_list);
+	VTAILQ_CONCAT(&new_oc->waitinglist, &old_oc->waitinglist, w_list);
+}
+
 /*---------------------------------------------------------------------
  */
 
@@ -551,6 +587,8 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	struct objhead *oh;
 	struct objcore *oc;
 	struct objcore *busy_oc;
+	struct objcore *rush_oc;
+	struct rush rush;
 	intmax_t boc_progress;
 	unsigned xid = 0;
 	float dttl = 0.0;
@@ -591,17 +629,42 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		 * This req came off the waiting list, and brings an
 		 * incompatible hash_oc refcnt with it.
 		 */
-		CHECK_OBJ_NOTNULL(req->hash_oc->objhead, OBJHEAD_MAGIC);
-		oh = req->hash_oc->objhead;
-		/* XXX: can we avoid grabbing the oh lock twice here? */
-		(void)HSH_DerefObjCore(wrk, &req->hash_oc, HSH_RUSH_POLICY);
+		TAKE_OBJ_NOTNULL(rush_oc, &req->hash_oc, OBJCORE_MAGIC);
+		oh = rush_oc->objhead;
 		Lck_Lock(&oh->mtx);
 	} else {
 		AN(wrk->wpriv->nobjhead);
 		oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
+		rush_oc = NULL;
 	}
 
 	lr = hsh_objhead_lookup(oh, req, &oc, &busy_oc);
+
+	INIT_OBJ(&rush, RUSH_MAGIC);
+
+	if (rush_oc != NULL && !VTAILQ_EMPTY(&rush_oc->waitinglist)) {
+		switch (lr) {
+		case HSH_HIT:
+		case HSH_GRACE:
+			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+			AZ(oc->flags);
+			hsh_rush_move(oc, rush_oc);
+			hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
+			assert(VTAILQ_EMPTY(&oc->waitinglist));
+			break;
+		case HSH_BUSY:
+			CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
+			hsh_rush_move(busy_oc, rush_oc);
+			break;
+		default:
+			/* The remaining stragglers will be passed on to the
+			 * next busy object or woken up as per rush_exponent
+			 * when the rush_oc reference is dropped.
+			 */
+			break;
+		}
+	}
+
 	switch (lr) {
 	case HSH_MISS:
 	case HSH_MISS_EXP:
@@ -611,6 +674,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 		AZ(busy_oc);
 		*bocp = hsh_insert_busyobj(wrk, oh);
+		hsh_rush_move(*bocp, rush_oc);
 		Lck_Unlock(&oh->mtx);
 		break;
 	case HSH_HITMISS:
@@ -620,6 +684,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		xid = VXID(ObjGetXID(wrk, oc));
 		dttl = EXP_Dttl(req, oc);
 		*bocp = hsh_insert_busyobj(wrk, oh);
+		hsh_rush_move(*bocp, rush_oc);
 		Lck_Unlock(&oh->mtx);
 		wrk->stats->cache_hitmiss++;
 		VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
@@ -664,8 +729,10 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
 		AZ(req->hash_ignore_busy);
 
-		/* There are one or more busy objects, wait for them */
-		VTAILQ_INSERT_TAIL(&busy_oc->waitinglist, req, w_list);
+		if (rush_oc != NULL)
+			VTAILQ_INSERT_HEAD(&busy_oc->waitinglist, req, w_list);
+		else
+			VTAILQ_INSERT_TAIL(&busy_oc->waitinglist, req, w_list);
 
 		/*
 		 * The objcore reference transfers to the req, we get it
@@ -694,6 +761,11 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	if (oc != NULL)
 		*ocp = oc;
 
+	if (rush_oc != NULL) {
+		hsh_rush2(wrk, &rush);
+		(void)HSH_DerefObjCore(wrk, &rush_oc, HSH_RUSH_POLICY);
+	}
+
 	return (lr);
 }
 
diff --git a/bin/varnishtest/tests/c00099.vtc b/bin/varnishtest/tests/c00099.vtc
index 9ee17759756..772279f923c 100644
--- a/bin/varnishtest/tests/c00099.vtc
+++ b/bin/varnishtest/tests/c00099.vtc
@@ -68,7 +68,10 @@ server s6 {
 	chunkedlen 0
 } -start
 
-varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_exponent=2" -arg "-p debug=+syncvsl" -arg "-p debug=+waitinglist" -vcl+backend {
+varnish v1 -cliok "param.set thread_pools 1"
+varnish v1 -cliok "param.set rush_exponent 2"
+varnish v1 -cliok "param.set debug +syncvsl,+waitinglist"
+varnish v1 -vcl+backend {
 	sub vcl_backend_fetch {
 		if (bereq.http.user-agent == "c1") {
 			set bereq.backend = s1;
@@ -97,32 +100,63 @@ client c1 {
 # This makes sure that c1->s1 is done first
 barrier b1 sync
 
+# This will ensure that c{2..6} enter c1's waiting list in order.
+logexpect l2 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c2"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l3 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c3"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l4 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c4"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l5 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c5"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l6 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c6"
+	expect * = Debug	"on waiting list"
+} -start
+
 client c2 {
 	txreq
 	rxresp
 } -start
 
+logexpect l2 -wait
+
 client c3 {
 	txreq
 	rxresp
 } -start
 
+logexpect l3 -wait
+
 client c4 {
 	txreq
 	rxresp
 } -start
 
+logexpect l4 -wait
+
 client c5 {
 	txreq
 	rxresp
 } -start
 
+logexpect l5 -wait
+
 client c6 {
 	txreq
 	rxresp
 } -start
 
-# Wait until c2-c6 are on the waitinglist
+logexpect l6 -wait
+
 varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep == 5
 
@@ -135,3 +169,19 @@ client c3 -wait
 client c4 -wait
 client c5 -wait
 client c6 -wait
+
+varnish v1 -vsl_catchup
+
+# Check the effect of rush_exponent=2, with limited VXID guarantees.
+
+logexpect l1 -v v1 -g raw -d 1 -q "vxid != 0" -i Debug {
+	expect * 1002	Debug "waiting list rush for req 1004"
+	expect 0 =	Debug "waiting list rush for req 1006"
+
+	# triggered by 1004 or 1006
+	expect * *	Debug "waiting list rush for req 1008"
+	expect 0 =	Debug "waiting list rush for req 1010"
+
+	# trigerred by any VXID except 1002
+	expect * *	Debug "waiting list rush for req 1012"
+} -run

From 05d90529becc60ba6c48f0fe813b9609e58866db Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Mon, 2 Oct 2023 12:13:24 +0200
Subject: [PATCH 13/14] hash: Retire the HSH_DerefObjCore(rushmax) argument

Now that waiting lists are owned by objcores, the rush policy no longer
needs to leak everywhere. Either all the waiters are rushed at once for
cacheable objects, or according to rush_exponent.
---
 bin/varnishd/cache/cache_ban_lurker.c         |  2 +-
 bin/varnishd/cache/cache_busyobj.c            |  6 ++--
 bin/varnishd/cache/cache_expire.c             |  6 ++--
 bin/varnishd/cache/cache_fetch.c              | 10 +++----
 bin/varnishd/cache/cache_hash.c               | 30 +++++++------------
 bin/varnishd/cache/cache_objhead.h            |  3 +-
 bin/varnishd/cache/cache_req_body.c           | 10 +++----
 bin/varnishd/cache/cache_req_fsm.c            | 22 +++++++-------
 bin/varnishd/cache/cache_vrt_var.c            |  2 +-
 bin/varnishd/storage/storage_lru.c            |  2 +-
 .../storage/storage_persistent_silo.c         |  2 +-
 include/tbl/params.h                          |  2 +-
 12 files changed, 43 insertions(+), 54 deletions(-)

diff --git a/bin/varnishd/cache/cache_ban_lurker.c b/bin/varnishd/cache/cache_ban_lurker.c
index 76ac772b9f7..311ec31d7fe 100644
--- a/bin/varnishd/cache/cache_ban_lurker.c
+++ b/bin/varnishd/cache/cache_ban_lurker.c
@@ -332,7 +332,7 @@ ban_lurker_test_ban(struct worker *wrk, struct vsl_log *vsl, struct ban *bt,
 			if (i)
 				ObjSendEvent(wrk, oc, OEV_BANCHG);
 		}
-		(void)HSH_DerefObjCore(wrk, &oc, 0);
+		(void)HSH_DerefObjCore(wrk, &oc);
 	}
 }
 
diff --git a/bin/varnishd/cache/cache_busyobj.c b/bin/varnishd/cache/cache_busyobj.c
index 9a5fe36d11a..17ef9b26738 100644
--- a/bin/varnishd/cache/cache_busyobj.c
+++ b/bin/varnishd/cache/cache_busyobj.c
@@ -175,10 +175,8 @@ VBO_ReleaseBusyObj(struct worker *wrk, struct busyobj **pbo)
 	if (WS_Overflowed(bo->ws))
 		wrk->stats->ws_backend_overflow++;
 
-	if (bo->fetch_objcore != NULL) {
-		(void)HSH_DerefObjCore(wrk, &bo->fetch_objcore,
-		    HSH_RUSH_POLICY);
-	}
+	if (bo->fetch_objcore != NULL)
+		(void)HSH_DerefObjCore(wrk, &bo->fetch_objcore);
 
 	VRT_Assign_Backend(&bo->director_req, NULL);
 	VRT_Assign_Backend(&bo->director_resp, NULL);
diff --git a/bin/varnishd/cache/cache_expire.c b/bin/varnishd/cache/cache_expire.c
index 0392d191d09..f1122ec34df 100644
--- a/bin/varnishd/cache/cache_expire.c
+++ b/bin/varnishd/cache/cache_expire.c
@@ -209,7 +209,7 @@ EXP_Insert(struct worker *wrk, struct objcore *oc)
 		ObjSendEvent(wrk, oc, OEV_EXPIRE);
 		tmpoc = oc;
 		assert(oc->refcnt >= 2); /* Silence coverity */
-		(void)HSH_DerefObjCore(wrk, &oc, 0);
+		(void)HSH_DerefObjCore(wrk, &oc);
 		AZ(oc);
 		assert(tmpoc->refcnt >= 1); /* Silence coverity */
 	}
@@ -280,7 +280,7 @@ exp_inbox(struct exp_priv *ep, struct objcore *oc, unsigned flags)
 		assert(oc->refcnt > 0);
 		AZ(oc->exp_flags);
 		ObjSendEvent(ep->wrk, oc, OEV_EXPIRE);
-		(void)HSH_DerefObjCore(ep->wrk, &oc, 0);
+		(void)HSH_DerefObjCore(ep->wrk, &oc);
 		return;
 	}
 
@@ -357,7 +357,7 @@ exp_expire(struct exp_priv *ep, vtim_real now)
 		VSLb(&ep->vsl, SLT_ExpKill, "EXP_Expired xid=%ju t=%.0f",
 		    VXID(ObjGetXID(ep->wrk, oc)), EXP_Ttl(NULL, oc) - now);
 		ObjSendEvent(ep->wrk, oc, OEV_EXPIRE);
-		(void)HSH_DerefObjCore(ep->wrk, &oc, 0);
+		(void)HSH_DerefObjCore(ep->wrk, &oc);
 	}
 	return (0);
 }
diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c
index 995568a1820..ebe5b0b73c7 100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@ -1058,7 +1058,7 @@ vbf_fetch_thread(struct worker *wrk, void *priv)
 		CHECK_OBJ_NOTNULL(bo->stale_oc, OBJCORE_MAGIC);
 		/* We don't want the oc/stevedore ops in fetching thread */
 		if (!ObjCheckFlag(wrk, bo->stale_oc, OF_IMSCAND))
-			(void)HSH_DerefObjCore(wrk, &bo->stale_oc, 0);
+			(void)HSH_DerefObjCore(wrk, &bo->stale_oc);
 	}
 #endif
 
@@ -1085,7 +1085,7 @@ vbf_fetch_thread(struct worker *wrk, void *priv)
 	http_Teardown(bo->beresp);
 	// can not make assumptions about the number of references here #3434
 	if (bo->bereq_body != NULL)
-		(void) HSH_DerefObjCore(bo->wrk, &bo->bereq_body, 0);
+		(void) HSH_DerefObjCore(bo->wrk, &bo->bereq_body);
 
 	if (oc->boc->state == BOS_FINISHED) {
 		AZ(oc->flags & OC_F_FAILED);
@@ -1095,7 +1095,7 @@ vbf_fetch_thread(struct worker *wrk, void *priv)
 	// AZ(oc->boc);	// XXX
 
 	if (bo->stale_oc != NULL)
-		(void)HSH_DerefObjCore(wrk, &bo->stale_oc, 0);
+		(void)HSH_DerefObjCore(wrk, &bo->stale_oc);
 
 	wrk->vsl = NULL;
 	HSH_DerefBoc(wrk, oc);
@@ -1184,7 +1184,7 @@ VBF_Fetch(struct worker *wrk, struct req *req, struct objcore *oc,
 		wrk->stats->bgfetch_no_thread++;
 		(void)vbf_stp_fail(req->wrk, bo);
 		if (bo->stale_oc != NULL)
-			(void)HSH_DerefObjCore(wrk, &bo->stale_oc, 0);
+			(void)HSH_DerefObjCore(wrk, &bo->stale_oc);
 		HSH_DerefBoc(wrk, oc);
 		SES_Rel(bo->sp);
 		THR_SetBusyobj(NULL);
@@ -1209,5 +1209,5 @@ VBF_Fetch(struct worker *wrk, struct req *req, struct objcore *oc,
 	assert(oc->boc == boc);
 	HSH_DerefBoc(wrk, oc);
 	if (mode == VBF_BACKGROUND)
-		(void)HSH_DerefObjCore(wrk, &oc, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &oc);
 }
diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 5553d9f2bca..3fb8ffff2af 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -75,8 +75,7 @@ struct rush {
 static const struct hash_slinger *hash;
 static struct objhead *private_oh;
 
-static void hsh_rush1(const struct worker *, struct objcore *,
-    struct rush *, int);
+static void hsh_rush1(const struct worker *, struct objcore *, struct rush *);
 static void hsh_rush2(struct worker *, struct rush *);
 static int hsh_deref_objhead(struct worker *wrk, struct objhead **poh);
 static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh);
@@ -649,7 +648,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 			AZ(oc->flags);
 			hsh_rush_move(oc, rush_oc);
-			hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
+			hsh_rush1(wrk, oc, &rush);
 			assert(VTAILQ_EMPTY(&oc->waitinglist));
 			break;
 		case HSH_BUSY:
@@ -763,7 +762,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 
 	if (rush_oc != NULL) {
 		hsh_rush2(wrk, &rush);
-		(void)HSH_DerefObjCore(wrk, &rush_oc, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &rush_oc);
 	}
 
 	return (lr);
@@ -774,21 +773,14 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
  */
 
 static void
-hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r, int max)
+hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)
 {
-	int i;
+	int max, i;
 	unsigned xid = 0;
 	struct req *req;
 
-	if (max == 0)
-		return;
-	if (max == HSH_RUSH_POLICY) {
-		AZ(oc->flags & OC_F_BUSY);
-		if (oc->flags != 0)
-			max = cache_param->rush_exponent;
-		else
-			max = INT_MAX;
-	}
+	AZ(oc->flags & OC_F_BUSY);
+	max = oc->flags != 0 ? cache_param->rush_exponent : INT_MAX;
 	assert(max > 0);
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
@@ -927,7 +919,7 @@ HSH_Purge(struct worker *wrk, struct objhead *oh, vtim_real ttl_now,
 		for (i = 0; i < j; i++) {
 			CHECK_OBJ_NOTNULL(ocp[i], OBJCORE_MAGIC);
 			EXP_Rearm(ocp[i], ttl_now, ttl, grace, keep);
-			(void)HSH_DerefObjCore(wrk, &ocp[i], 0);
+			(void)HSH_DerefObjCore(wrk, &ocp[i]);
 			AZ(ocp[i]);
 			total++;
 		}
@@ -1073,7 +1065,7 @@ HSH_Unbusy(struct worker *wrk, struct objcore *oc)
 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
 	oc->flags &= ~OC_F_BUSY;
-	hsh_rush1(wrk, oc, &rush, HSH_RUSH_POLICY);
+	hsh_rush1(wrk, oc, &rush);
 	Lck_Unlock(&oh->mtx);
 	EXP_Insert(wrk, oc); /* Does nothing unless EXP_RefNewObjcore was
 			      * called */
@@ -1204,7 +1196,7 @@ HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
  */
 
 int
-HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
+HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp)
 {
 	struct objcore *oc;
 	struct objhead *oh;
@@ -1226,7 +1218,7 @@ HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
 		VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	if (!VTAILQ_EMPTY(&oc->waitinglist)) {
 		assert(oc->refcnt > 0);
-		hsh_rush1(wrk, oc, &rush, rushmax);
+		hsh_rush1(wrk, oc, &rush);
 	}
 	Lck_Unlock(&oh->mtx);
 	hsh_rush2(wrk, &rush);
diff --git a/bin/varnishd/cache/cache_objhead.h b/bin/varnishd/cache/cache_objhead.h
index c4f0366bf12..50d3823ad63 100644
--- a/bin/varnishd/cache/cache_objhead.h
+++ b/bin/varnishd/cache/cache_objhead.h
@@ -76,8 +76,7 @@ struct boc *HSH_RefBoc(const struct objcore *);
 void HSH_DerefBoc(struct worker *wrk, struct objcore *);
 void HSH_DeleteObjHead(const struct worker *, struct objhead *);
 
-int HSH_DerefObjCore(struct worker *, struct objcore **, int rushmax);
-#define HSH_RUSH_POLICY -1
+int HSH_DerefObjCore(struct worker *, struct objcore **);
 
 enum lookup_e HSH_Lookup(struct req *, struct objcore **, struct objcore **);
 void HSH_Ref(struct objcore *o);
diff --git a/bin/varnishd/cache/cache_req_body.c b/bin/varnishd/cache/cache_req_body.c
index 3cbd627348f..2767feda927 100644
--- a/bin/varnishd/cache/cache_req_body.c
+++ b/bin/varnishd/cache/cache_req_body.c
@@ -81,7 +81,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 	if (STV_NewObject(req->wrk, req->body_oc, stv, hint) == 0) {
 		req->req_body_status = BS_ERROR;
 		HSH_DerefBoc(req->wrk, req->body_oc);
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		(void)VFP_Error(vfc, "Object allocation failed:"
 		    " Ran out of space in %s", stv->vclname);
 		return (-1);
@@ -95,7 +95,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 	if (VFP_Open(ctx, vfc) < 0) {
 		req->req_body_status = BS_ERROR;
 		HSH_DerefBoc(req->wrk, req->body_oc);
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		return (-1);
 	}
 
@@ -141,7 +141,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 	VSLb_ts_req(req, "ReqBody", VTIM_real());
 	if (func != NULL) {
 		HSH_DerefBoc(req->wrk, req->body_oc);
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		if (vfps != VFP_END) {
 			req->req_body_status = BS_ERROR;
 			if (r == 0)
@@ -155,7 +155,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 
 	if (vfps != VFP_END) {
 		req->req_body_status = BS_ERROR;
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		return (-1);
 	}
 
@@ -276,7 +276,7 @@ VRB_Free(struct req *req)
 	if (req->body_oc == NULL)
 		return;
 
-	r = HSH_DerefObjCore(req->wrk, &req->body_oc, 0);
+	r = HSH_DerefObjCore(req->wrk, &req->body_oc);
 
 	// each busyobj may have gained a reference
 	assert (r >= 0);
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index c4b986460dd..80b933792dc 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -219,7 +219,7 @@ cnt_deliver(struct worker *wrk, struct req *req)
 	ObjTouch(req->wrk, req->objcore, req->t_prev);
 
 	if (Resp_Setup_Deliver(req)) {
-		(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		req->err_code = 500;
 		req->req_step = R_STP_SYNTH;
 		return (REQ_FSM_MORE);
@@ -239,7 +239,7 @@ cnt_deliver(struct worker *wrk, struct req *req)
 
 	if (wrk->vpi->handling != VCL_RET_DELIVER) {
 		HSH_Cancel(wrk, req->objcore, NULL);
-		(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		http_Teardown(req->resp);
 
 		switch (wrk->vpi->handling) {
@@ -400,7 +400,7 @@ cnt_synth(struct worker *wrk, struct req *req)
 		VSLb(req->vsl, SLT_Error, "Could not get storage");
 		req->doclose = SC_OVERLOAD;
 		VSLb_ts_req(req, "Resp", W_TIM_real(wrk));
-		(void)HSH_DerefObjCore(wrk, &req->objcore, 1);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		http_Teardown(req->resp);
 		return (REQ_FSM_DONE);
 	}
@@ -499,7 +499,7 @@ cnt_transmit(struct worker *wrk, struct req *req)
 	if (boc != NULL)
 		HSH_DerefBoc(wrk, req->objcore);
 
-	(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+	(void)HSH_DerefObjCore(wrk, &req->objcore);
 	http_Teardown(req->resp);
 
 	req->filter_list = NULL;
@@ -526,7 +526,7 @@ cnt_fetch(struct worker *wrk, struct req *req)
 	if (req->objcore->flags & OC_F_FAILED) {
 		req->err_code = 503;
 		req->req_step = R_STP_SYNTH;
-		(void)HSH_DerefObjCore(wrk, &req->objcore, 1);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		AZ(req->objcore);
 		return (REQ_FSM_MORE);
 	}
@@ -661,10 +661,10 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	}
 
 	/* Drop our object, we won't need it */
-	(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+	(void)HSH_DerefObjCore(wrk, &req->objcore);
 
 	if (busy != NULL) {
-		(void)HSH_DerefObjCore(wrk, &busy, 0);
+		(void)HSH_DerefObjCore(wrk, &busy);
 		VRY_Clear(req);
 	}
 
@@ -691,7 +691,7 @@ cnt_miss(struct worker *wrk, struct req *req)
 		wrk->stats->cache_miss++;
 		VBF_Fetch(wrk, req, req->objcore, req->stale_oc, VBF_NORMAL);
 		if (req->stale_oc != NULL)
-			(void)HSH_DerefObjCore(wrk, &req->stale_oc, 0);
+			(void)HSH_DerefObjCore(wrk, &req->stale_oc);
 		req->req_step = R_STP_FETCH;
 		return (REQ_FSM_MORE);
 	case VCL_RET_FAIL:
@@ -711,8 +711,8 @@ cnt_miss(struct worker *wrk, struct req *req)
 	}
 	VRY_Clear(req);
 	if (req->stale_oc != NULL)
-		(void)HSH_DerefObjCore(wrk, &req->stale_oc, 0);
-	AZ(HSH_DerefObjCore(wrk, &req->objcore, 1));
+		(void)HSH_DerefObjCore(wrk, &req->stale_oc);
+	AZ(HSH_DerefObjCore(wrk, &req->objcore));
 	return (REQ_FSM_MORE);
 }
 
@@ -1081,7 +1081,7 @@ cnt_purge(struct worker *wrk, struct req *req)
 
 	(void)HSH_Purge(wrk, boc->objhead, req->t_req, 0, 0, 0);
 
-	AZ(HSH_DerefObjCore(wrk, &boc, 1));
+	AZ(HSH_DerefObjCore(wrk, &boc));
 
 	VCL_purge_method(req->vcl, wrk, req, NULL, NULL);
 	switch (wrk->vpi->handling) {
diff --git a/bin/varnishd/cache/cache_vrt_var.c b/bin/varnishd/cache/cache_vrt_var.c
index 8810fcc513d..3f1f3dd05d0 100644
--- a/bin/varnishd/cache/cache_vrt_var.c
+++ b/bin/varnishd/cache/cache_vrt_var.c
@@ -614,7 +614,7 @@ VRT_u_bereq_body(VRT_CTX)
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(ctx->bo, BUSYOBJ_MAGIC);
 	if (ctx->bo->bereq_body != NULL) {
-		(void)HSH_DerefObjCore(ctx->bo->wrk, &ctx->bo->bereq_body, 0);
+		(void)HSH_DerefObjCore(ctx->bo->wrk, &ctx->bo->bereq_body);
 		http_Unset(ctx->bo->bereq, H_Content_Length);
 	}
 
diff --git a/bin/varnishd/storage/storage_lru.c b/bin/varnishd/storage/storage_lru.c
index 5e046e79468..ec0369c8173 100644
--- a/bin/varnishd/storage/storage_lru.c
+++ b/bin/varnishd/storage/storage_lru.c
@@ -205,6 +205,6 @@ LRU_NukeOne(struct worker *wrk, struct lru *lru)
 	ObjSlim(wrk, oc);
 
 	VSLb(wrk->vsl, SLT_ExpKill, "LRU xid=%ju", VXID(ObjGetXID(wrk, oc)));
-	(void)HSH_DerefObjCore(wrk, &oc, 0);	// Ref from HSH_Snipe
+	(void)HSH_DerefObjCore(wrk, &oc);	// Ref from HSH_Snipe
 	return (1);
 }
diff --git a/bin/varnishd/storage/storage_persistent_silo.c b/bin/varnishd/storage/storage_persistent_silo.c
index 81c24b4367f..eb96acd3adb 100644
--- a/bin/varnishd/storage/storage_persistent_silo.c
+++ b/bin/varnishd/storage/storage_persistent_silo.c
@@ -178,7 +178,7 @@ smp_load_seg(struct worker *wrk, const struct smp_sc *sc,
 		HSH_Insert(wrk, so->hash, oc, ban);
 		AN(oc->ban);
 		HSH_DerefBoc(wrk, oc);	// XXX Keep it an stream resurrection?
-		(void)HSH_DerefObjCore(wrk, &oc, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &oc);
 		wrk->stats->n_vampireobject++;
 	}
 	Pool_Sumstat(wrk);
diff --git a/include/tbl/params.h b/include/tbl/params.h
index 83054eef3de..c64b22d64d3 100644
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -800,7 +800,7 @@ PARAM_SIMPLE(
 	/* descr */
 	"How many parked request we start for each completed request on "
 	"the object.\n"
-	"NB: Even with the implict delay of delivery, this parameter "
+	"NB: Even with the implicit delay of delivery, this parameter "
 	"controls an exponential increase in number of worker threads.",
 	/* flags */	EXPERIMENTAL
 )

From 6fe0207fa756cc565c61ad0d1fd24cd7526b518e Mon Sep 17 00:00:00 2001
From: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
Date: Mon, 2 Oct 2023 14:08:32 +0200
Subject: [PATCH 14/14] hash: Group rush functions together

---
 bin/varnishd/cache/cache_hash.c | 150 ++++++++++++++++----------------
 1 file changed, 74 insertions(+), 76 deletions(-)

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 3fb8ffff2af..0d729288bfe 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -75,8 +75,6 @@ struct rush {
 static const struct hash_slinger *hash;
 static struct objhead *private_oh;
 
-static void hsh_rush1(const struct worker *, struct objcore *, struct rush *);
-static void hsh_rush2(struct worker *, struct rush *);
 static int hsh_deref_objhead(struct worker *wrk, struct objhead **poh);
 static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh);
 
@@ -347,6 +345,80 @@ hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
 	return (oc);
 }
 
+/*---------------------------------------------------------------------
+ * Pick the req's we are going to rush from the waiting list
+ */
+
+static void
+hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)
+{
+	int max, i;
+	unsigned xid = 0;
+	struct req *req;
+
+	AZ(oc->flags & OC_F_BUSY);
+	max = oc->flags != 0 ? cache_param->rush_exponent : INT_MAX;
+	assert(max > 0);
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
+	VTAILQ_INIT(&r->reqs);
+	Lck_AssertHeld(&oc->objhead->mtx);
+	for (i = 0; i < max; i++) {
+		req = VTAILQ_FIRST(&oc->waitinglist);
+		if (req == NULL)
+			break;
+
+		if (DO_DEBUG(DBG_WAITINGLIST)) {
+			xid = VXID(req->vsl->wid);
+			VSLb(wrk->vsl, SLT_Debug,
+			    "waiting list rush for req %u", xid);
+		}
+
+		assert(oc->refcnt > 0);
+		oc->refcnt++;
+		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+		wrk->stats->busy_wakeup++;
+		AZ(req->wrk);
+		VTAILQ_REMOVE(&oc->waitinglist, req, w_list);
+		VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
+		req->waitinglist = 0;
+	}
+}
+
+/*---------------------------------------------------------------------
+ * Rush req's that came from waiting list.
+ */
+
+static void
+hsh_rush2(struct worker *wrk, struct rush *r)
+{
+	struct req *req;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
+
+	while (!VTAILQ_EMPTY(&r->reqs)) {
+		req = VTAILQ_FIRST(&r->reqs);
+		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+		VTAILQ_REMOVE(&r->reqs, req, w_list);
+		DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
+		if (req->transport->reembark != NULL) {
+			// For ESI includes
+			req->transport->reembark(wrk, req);
+		} else {
+			/*
+			 * We ignore the queue limits which apply to new
+			 * requests because if we fail to reschedule there
+			 * may be vmod_privs to cleanup and we need a proper
+			 * workerthread for that.
+			 */
+			AZ(Pool_Task(req->sp->pool, req->task, TASK_QUEUE_RUSH));
+		}
+	}
+}
+
 /*---------------------------------------------------------------------
  */
 
@@ -768,80 +840,6 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	return (lr);
 }
 
-/*---------------------------------------------------------------------
- * Pick the req's we are going to rush from the waiting list
- */
-
-static void
-hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)
-{
-	int max, i;
-	unsigned xid = 0;
-	struct req *req;
-
-	AZ(oc->flags & OC_F_BUSY);
-	max = oc->flags != 0 ? cache_param->rush_exponent : INT_MAX;
-	assert(max > 0);
-
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
-	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
-	VTAILQ_INIT(&r->reqs);
-	Lck_AssertHeld(&oc->objhead->mtx);
-	for (i = 0; i < max; i++) {
-		req = VTAILQ_FIRST(&oc->waitinglist);
-		if (req == NULL)
-			break;
-
-		if (DO_DEBUG(DBG_WAITINGLIST)) {
-			xid = VXID(req->vsl->wid);
-			VSLb(wrk->vsl, SLT_Debug,
-			    "waiting list rush for req %u", xid);
-		}
-
-		assert(oc->refcnt > 0);
-		oc->refcnt++;
-		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-		wrk->stats->busy_wakeup++;
-		AZ(req->wrk);
-		VTAILQ_REMOVE(&oc->waitinglist, req, w_list);
-		VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
-		req->waitinglist = 0;
-	}
-}
-
-/*---------------------------------------------------------------------
- * Rush req's that came from waiting list.
- */
-
-static void
-hsh_rush2(struct worker *wrk, struct rush *r)
-{
-	struct req *req;
-
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
-
-	while (!VTAILQ_EMPTY(&r->reqs)) {
-		req = VTAILQ_FIRST(&r->reqs);
-		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-		VTAILQ_REMOVE(&r->reqs, req, w_list);
-		DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
-		if (req->transport->reembark != NULL) {
-			// For ESI includes
-			req->transport->reembark(wrk, req);
-		} else {
-			/*
-			 * We ignore the queue limits which apply to new
-			 * requests because if we fail to reschedule there
-			 * may be vmod_privs to cleanup and we need a proper
-			 * workerthread for that.
-			 */
-			AZ(Pool_Task(req->sp->pool, req->task, TASK_QUEUE_RUSH));
-		}
-	}
-}
-
 /*---------------------------------------------------------------------
  * Purge an entire objhead
  */