-
-
Notifications
You must be signed in to change notification settings - Fork 904
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ext: backport libxml2/gnome@bf5fcf6e for xmlXPathContext perf
See extended discussion at #3378 Benchmark comparing this commit against v1.17.x ("main"): Comparison: large: main: 3910.6 i/s large: patched: 3759.6 i/s - same-ish: difference falls within error Comparison: small: patched: 242901.7 i/s small: main: 127486.0 i/s - 1.91x slower I think we could get greater performance gains by re-using XPathContext objects, but only at the cost of a significant amount of additional complexity, since in order to properly support recursive XPath evaluation, Nokogiri would have to push and pop "stack frames" containing: - internal state contextSize and proximityPosition - registered namespaces - registered variables - function lookup handler That feels like a lot of code for a small win. Comparatively, pulling in this upstream patch is still a 2x speedup for zero additional complexity.
- Loading branch information
1 parent
47e89a3
commit 3f20dd9
Showing
2 changed files
with
245 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
244 changes: 244 additions & 0 deletions
244
patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
From d3e3526111097560cf7c002613e2cb1d469b59e0 Mon Sep 17 00:00:00 2001 | ||
From: Nick Wellnhofer <wellnhofer@aevum.de> | ||
Date: Sat, 21 Dec 2024 16:03:46 +0100 | ||
Subject: [PATCH] xpath: Use separate static hash table for standard functions | ||
|
||
This avoids registering standard functions when creating an XPath | ||
context. | ||
|
||
Lookup of extension functions is a bit slower now, but ultimately, all | ||
function lookups should be moved to the compilation phase. | ||
|
||
(cherry picked from commit bf5fcf6e646bb51a0f6a3655a1d64bea97274867) | ||
--- | ||
xpath.c | 170 ++++++++++++++++++++++++++++++++------------------------ | ||
1 file changed, 98 insertions(+), 72 deletions(-) | ||
|
||
diff --git a/xpath.c b/xpath.c | ||
index 485d7747..21711653 100644 | ||
--- a/xpath.c | ||
+++ b/xpath.c | ||
@@ -136,11 +136,48 @@ | ||
|
||
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) | ||
|
||
-/************************************************************************ | ||
- * * | ||
- * Floating point stuff * | ||
- * * | ||
- ************************************************************************/ | ||
+static void | ||
+xmlXPathNameFunction(xmlXPathParserContextPtr ctxt, int nargs); | ||
+ | ||
+static const struct { | ||
+ const char *name; | ||
+ xmlXPathFunction func; | ||
+} xmlXPathStandardFunctions[] = { | ||
+ { "boolean", xmlXPathBooleanFunction }, | ||
+ { "ceiling", xmlXPathCeilingFunction }, | ||
+ { "count", xmlXPathCountFunction }, | ||
+ { "concat", xmlXPathConcatFunction }, | ||
+ { "contains", xmlXPathContainsFunction }, | ||
+ { "id", xmlXPathIdFunction }, | ||
+ { "false", xmlXPathFalseFunction }, | ||
+ { "floor", xmlXPathFloorFunction }, | ||
+ { "last", xmlXPathLastFunction }, | ||
+ { "lang", xmlXPathLangFunction }, | ||
+ { "local-name", xmlXPathLocalNameFunction }, | ||
+ { "not", xmlXPathNotFunction }, | ||
+ { "name", xmlXPathNameFunction }, | ||
+ { "namespace-uri", xmlXPathNamespaceURIFunction }, | ||
+ { "normalize-space", xmlXPathNormalizeFunction }, | ||
+ { "number", xmlXPathNumberFunction }, | ||
+ { "position", xmlXPathPositionFunction }, | ||
+ { "round", xmlXPathRoundFunction }, | ||
+ { "string", xmlXPathStringFunction }, | ||
+ { "string-length", xmlXPathStringLengthFunction }, | ||
+ { "starts-with", xmlXPathStartsWithFunction }, | ||
+ { "substring", xmlXPathSubstringFunction }, | ||
+ { "substring-before", xmlXPathSubstringBeforeFunction }, | ||
+ { "substring-after", xmlXPathSubstringAfterFunction }, | ||
+ { "sum", xmlXPathSumFunction }, | ||
+ { "true", xmlXPathTrueFunction }, | ||
+ { "translate", xmlXPathTranslateFunction } | ||
+}; | ||
+ | ||
+#define NUM_STANDARD_FUNCTIONS \ | ||
+ (sizeof(xmlXPathStandardFunctions) / sizeof(xmlXPathStandardFunctions[0])) | ||
+ | ||
+#define SF_HASH_SIZE 64 | ||
+ | ||
+static unsigned char xmlXPathSFHash[SF_HASH_SIZE]; | ||
|
||
double xmlXPathNAN = 0.0; | ||
double xmlXPathPINF = 0.0; | ||
@@ -156,6 +193,18 @@ xmlXPathInit(void) { | ||
xmlInitParser(); | ||
} | ||
|
||
+ATTRIBUTE_NO_SANITIZE_INTEGER | ||
+static unsigned | ||
+xmlXPathSFComputeHash(const xmlChar *name) { | ||
+ unsigned hashValue = 5381; | ||
+ const xmlChar *ptr; | ||
+ | ||
+ for (ptr = name; *ptr; ptr++) | ||
+ hashValue = hashValue * 33 + *ptr; | ||
+ | ||
+ return(hashValue); | ||
+} | ||
+ | ||
/** | ||
* xmlInitXPathInternal: | ||
* | ||
@@ -164,6 +213,8 @@ xmlXPathInit(void) { | ||
ATTRIBUTE_NO_SANITIZE("float-divide-by-zero") | ||
void | ||
xmlInitXPathInternal(void) { | ||
+ size_t i; | ||
+ | ||
#if defined(NAN) && defined(INFINITY) | ||
xmlXPathNAN = NAN; | ||
xmlXPathPINF = INFINITY; | ||
@@ -175,8 +226,34 @@ xmlInitXPathInternal(void) { | ||
xmlXPathPINF = 1.0 / zero; | ||
xmlXPathNINF = -xmlXPathPINF; | ||
#endif | ||
+ | ||
+ /* | ||
+ * Initialize hash table for standard functions | ||
+ */ | ||
+ | ||
+ for (i = 0; i < SF_HASH_SIZE; i++) | ||
+ xmlXPathSFHash[i] = UCHAR_MAX; | ||
+ | ||
+ for (i = 0; i < NUM_STANDARD_FUNCTIONS; i++) { | ||
+ const char *name = xmlXPathStandardFunctions[i].name; | ||
+ int bucketIndex = xmlXPathSFComputeHash(BAD_CAST name) % SF_HASH_SIZE; | ||
+ | ||
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) { | ||
+ bucketIndex += 1; | ||
+ if (bucketIndex >= SF_HASH_SIZE) | ||
+ bucketIndex = 0; | ||
+ } | ||
+ | ||
+ xmlXPathSFHash[bucketIndex] = i; | ||
+ } | ||
} | ||
|
||
+/************************************************************************ | ||
+ * * | ||
+ * Floating point stuff * | ||
+ * * | ||
+ ************************************************************************/ | ||
+ | ||
/** | ||
* xmlXPathIsNaN: | ||
* @val: a double value | ||
@@ -3979,18 +4056,6 @@ xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt, | ||
*/ | ||
xmlXPathFunction | ||
xmlXPathFunctionLookup(xmlXPathContextPtr ctxt, const xmlChar *name) { | ||
- if (ctxt == NULL) | ||
- return (NULL); | ||
- | ||
- if (ctxt->funcLookupFunc != NULL) { | ||
- xmlXPathFunction ret; | ||
- xmlXPathFuncLookupFunc f; | ||
- | ||
- f = ctxt->funcLookupFunc; | ||
- ret = f(ctxt->funcLookupData, name, NULL); | ||
- if (ret != NULL) | ||
- return(ret); | ||
- } | ||
return(xmlXPathFunctionLookupNS(ctxt, name, NULL)); | ||
} | ||
|
||
@@ -4015,6 +4080,22 @@ xmlXPathFunctionLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name, | ||
if (name == NULL) | ||
return(NULL); | ||
|
||
+ if (ns_uri == NULL) { | ||
+ int bucketIndex = xmlXPathSFComputeHash(name) % SF_HASH_SIZE; | ||
+ | ||
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) { | ||
+ int funcIndex = xmlXPathSFHash[bucketIndex]; | ||
+ | ||
+ if (strcmp(xmlXPathStandardFunctions[funcIndex].name, | ||
+ (char *) name) == 0) | ||
+ return(xmlXPathStandardFunctions[funcIndex].func); | ||
+ | ||
+ bucketIndex += 1; | ||
+ if (bucketIndex >= SF_HASH_SIZE) | ||
+ bucketIndex = 0; | ||
+ } | ||
+ } | ||
+ | ||
if (ctxt->funcLookupFunc != NULL) { | ||
xmlXPathFuncLookupFunc f; | ||
|
||
@@ -13494,61 +13575,6 @@ xmlXPathEscapeUriFunction(xmlXPathParserContextPtr ctxt, int nargs) { | ||
void | ||
xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt) | ||
{ | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"boolean", | ||
- xmlXPathBooleanFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"ceiling", | ||
- xmlXPathCeilingFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"count", | ||
- xmlXPathCountFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"concat", | ||
- xmlXPathConcatFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"contains", | ||
- xmlXPathContainsFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"id", | ||
- xmlXPathIdFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"false", | ||
- xmlXPathFalseFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"floor", | ||
- xmlXPathFloorFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"last", | ||
- xmlXPathLastFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"lang", | ||
- xmlXPathLangFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"local-name", | ||
- xmlXPathLocalNameFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"not", | ||
- xmlXPathNotFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"name", | ||
- xmlXPathNameFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"namespace-uri", | ||
- xmlXPathNamespaceURIFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"normalize-space", | ||
- xmlXPathNormalizeFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"number", | ||
- xmlXPathNumberFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"position", | ||
- xmlXPathPositionFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"round", | ||
- xmlXPathRoundFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string", | ||
- xmlXPathStringFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string-length", | ||
- xmlXPathStringLengthFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"starts-with", | ||
- xmlXPathStartsWithFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring", | ||
- xmlXPathSubstringFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-before", | ||
- xmlXPathSubstringBeforeFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-after", | ||
- xmlXPathSubstringAfterFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"sum", | ||
- xmlXPathSumFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"true", | ||
- xmlXPathTrueFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"translate", | ||
- xmlXPathTranslateFunction); | ||
- | ||
xmlXPathRegisterFuncNS(ctxt, (const xmlChar *)"escape-uri", | ||
(const xmlChar *)"http://www.w3.org/2002/08/xquery-functions", | ||
xmlXPathEscapeUriFunction); | ||
-- | ||
2.47.1 | ||
|