Skip to content

Commit

Permalink
ext: backport libxml2/gnome@bf5fcf6e for xmlXPathContext perf
Browse files Browse the repository at this point in the history
See extended discussion at #3378

Benchmark comparing this commit against v1.17.x ("main"):

  Comparison:
           large: main:     3910.6 i/s
        large: patched:     3759.6 i/s - same-ish: difference falls within error

  Comparison:
        small: patched:   242901.7 i/s
           small: main:   127486.0 i/s - 1.91x  slower

I think we could get greater performance gains by re-using
XPathContext objects, but only at the cost of a significant amount of
additional complexity, since in order to properly support recursive
XPath evaluation, Nokogiri would have to push and pop "stack frames"
containing:

- internal state contextSize and proximityPosition
- registered namespaces
- registered variables
- function lookup handler

That feels like a lot of code for a small win. Comparatively, pulling
in this upstream patch is still a 2x speedup for zero additional complexity.
  • Loading branch information
flavorjones committed Dec 21, 2024
1 parent 47e89a3 commit 3f20dd9
Show file tree
Hide file tree
Showing 2 changed files with 245 additions and 1 deletion.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ This release drops precompiled native platform gems for `x86-linux` and `x86-min

### Improved

* [CRuby] CSS and XPath queries are faster now that `Node#xpath`, `Node#css`, and related functions are re-using the underlying xpath context object (which is expensive to initialize). We benchmarked a 2.8x improvement for a 6kb file, and a more modest 1.3x improvement for a 70kb file. (#3378) @flavorjones
* [CRuby] CSS and XPath queries are faster now that `Node#xpath`, `Node#css`, and related functions are using a faster XPathContext initialization process. We benchmarked a 1.9x improvement for a 6kb file. Big thanks to @nwellnhof for helping with this one. (#3378, superseded by #3389) @flavorjones


## v1.17.2 / 2024-12-12
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
From d3e3526111097560cf7c002613e2cb1d469b59e0 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Sat, 21 Dec 2024 16:03:46 +0100
Subject: [PATCH] xpath: Use separate static hash table for standard functions

This avoids registering standard functions when creating an XPath
context.

Lookup of extension functions is a bit slower now, but ultimately, all
function lookups should be moved to the compilation phase.

(cherry picked from commit bf5fcf6e646bb51a0f6a3655a1d64bea97274867)
---
xpath.c | 170 ++++++++++++++++++++++++++++++++------------------------
1 file changed, 98 insertions(+), 72 deletions(-)

diff --git a/xpath.c b/xpath.c
index 485d7747..21711653 100644
--- a/xpath.c
+++ b/xpath.c
@@ -136,11 +136,48 @@

#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)

-/************************************************************************
- * *
- * Floating point stuff *
- * *
- ************************************************************************/
+static void
+xmlXPathNameFunction(xmlXPathParserContextPtr ctxt, int nargs);
+
+static const struct {
+ const char *name;
+ xmlXPathFunction func;
+} xmlXPathStandardFunctions[] = {
+ { "boolean", xmlXPathBooleanFunction },
+ { "ceiling", xmlXPathCeilingFunction },
+ { "count", xmlXPathCountFunction },
+ { "concat", xmlXPathConcatFunction },
+ { "contains", xmlXPathContainsFunction },
+ { "id", xmlXPathIdFunction },
+ { "false", xmlXPathFalseFunction },
+ { "floor", xmlXPathFloorFunction },
+ { "last", xmlXPathLastFunction },
+ { "lang", xmlXPathLangFunction },
+ { "local-name", xmlXPathLocalNameFunction },
+ { "not", xmlXPathNotFunction },
+ { "name", xmlXPathNameFunction },
+ { "namespace-uri", xmlXPathNamespaceURIFunction },
+ { "normalize-space", xmlXPathNormalizeFunction },
+ { "number", xmlXPathNumberFunction },
+ { "position", xmlXPathPositionFunction },
+ { "round", xmlXPathRoundFunction },
+ { "string", xmlXPathStringFunction },
+ { "string-length", xmlXPathStringLengthFunction },
+ { "starts-with", xmlXPathStartsWithFunction },
+ { "substring", xmlXPathSubstringFunction },
+ { "substring-before", xmlXPathSubstringBeforeFunction },
+ { "substring-after", xmlXPathSubstringAfterFunction },
+ { "sum", xmlXPathSumFunction },
+ { "true", xmlXPathTrueFunction },
+ { "translate", xmlXPathTranslateFunction }
+};
+
+#define NUM_STANDARD_FUNCTIONS \
+ (sizeof(xmlXPathStandardFunctions) / sizeof(xmlXPathStandardFunctions[0]))
+
+#define SF_HASH_SIZE 64
+
+static unsigned char xmlXPathSFHash[SF_HASH_SIZE];

double xmlXPathNAN = 0.0;
double xmlXPathPINF = 0.0;
@@ -156,6 +193,18 @@ xmlXPathInit(void) {
xmlInitParser();
}

+ATTRIBUTE_NO_SANITIZE_INTEGER
+static unsigned
+xmlXPathSFComputeHash(const xmlChar *name) {
+ unsigned hashValue = 5381;
+ const xmlChar *ptr;
+
+ for (ptr = name; *ptr; ptr++)
+ hashValue = hashValue * 33 + *ptr;
+
+ return(hashValue);
+}
+
/**
* xmlInitXPathInternal:
*
@@ -164,6 +213,8 @@ xmlXPathInit(void) {
ATTRIBUTE_NO_SANITIZE("float-divide-by-zero")
void
xmlInitXPathInternal(void) {
+ size_t i;
+
#if defined(NAN) && defined(INFINITY)
xmlXPathNAN = NAN;
xmlXPathPINF = INFINITY;
@@ -175,8 +226,34 @@ xmlInitXPathInternal(void) {
xmlXPathPINF = 1.0 / zero;
xmlXPathNINF = -xmlXPathPINF;
#endif
+
+ /*
+ * Initialize hash table for standard functions
+ */
+
+ for (i = 0; i < SF_HASH_SIZE; i++)
+ xmlXPathSFHash[i] = UCHAR_MAX;
+
+ for (i = 0; i < NUM_STANDARD_FUNCTIONS; i++) {
+ const char *name = xmlXPathStandardFunctions[i].name;
+ int bucketIndex = xmlXPathSFComputeHash(BAD_CAST name) % SF_HASH_SIZE;
+
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) {
+ bucketIndex += 1;
+ if (bucketIndex >= SF_HASH_SIZE)
+ bucketIndex = 0;
+ }
+
+ xmlXPathSFHash[bucketIndex] = i;
+ }
}

+/************************************************************************
+ * *
+ * Floating point stuff *
+ * *
+ ************************************************************************/
+
/**
* xmlXPathIsNaN:
* @val: a double value
@@ -3979,18 +4056,6 @@ xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt,
*/
xmlXPathFunction
xmlXPathFunctionLookup(xmlXPathContextPtr ctxt, const xmlChar *name) {
- if (ctxt == NULL)
- return (NULL);
-
- if (ctxt->funcLookupFunc != NULL) {
- xmlXPathFunction ret;
- xmlXPathFuncLookupFunc f;
-
- f = ctxt->funcLookupFunc;
- ret = f(ctxt->funcLookupData, name, NULL);
- if (ret != NULL)
- return(ret);
- }
return(xmlXPathFunctionLookupNS(ctxt, name, NULL));
}

@@ -4015,6 +4080,22 @@ xmlXPathFunctionLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name,
if (name == NULL)
return(NULL);

+ if (ns_uri == NULL) {
+ int bucketIndex = xmlXPathSFComputeHash(name) % SF_HASH_SIZE;
+
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) {
+ int funcIndex = xmlXPathSFHash[bucketIndex];
+
+ if (strcmp(xmlXPathStandardFunctions[funcIndex].name,
+ (char *) name) == 0)
+ return(xmlXPathStandardFunctions[funcIndex].func);
+
+ bucketIndex += 1;
+ if (bucketIndex >= SF_HASH_SIZE)
+ bucketIndex = 0;
+ }
+ }
+
if (ctxt->funcLookupFunc != NULL) {
xmlXPathFuncLookupFunc f;

@@ -13494,61 +13575,6 @@ xmlXPathEscapeUriFunction(xmlXPathParserContextPtr ctxt, int nargs) {
void
xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt)
{
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"boolean",
- xmlXPathBooleanFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"ceiling",
- xmlXPathCeilingFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"count",
- xmlXPathCountFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"concat",
- xmlXPathConcatFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"contains",
- xmlXPathContainsFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"id",
- xmlXPathIdFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"false",
- xmlXPathFalseFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"floor",
- xmlXPathFloorFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"last",
- xmlXPathLastFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"lang",
- xmlXPathLangFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"local-name",
- xmlXPathLocalNameFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"not",
- xmlXPathNotFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"name",
- xmlXPathNameFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"namespace-uri",
- xmlXPathNamespaceURIFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"normalize-space",
- xmlXPathNormalizeFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"number",
- xmlXPathNumberFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"position",
- xmlXPathPositionFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"round",
- xmlXPathRoundFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string",
- xmlXPathStringFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string-length",
- xmlXPathStringLengthFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"starts-with",
- xmlXPathStartsWithFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring",
- xmlXPathSubstringFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-before",
- xmlXPathSubstringBeforeFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-after",
- xmlXPathSubstringAfterFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"sum",
- xmlXPathSumFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"true",
- xmlXPathTrueFunction);
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"translate",
- xmlXPathTranslateFunction);
-
xmlXPathRegisterFuncNS(ctxt, (const xmlChar *)"escape-uri",
(const xmlChar *)"http://www.w3.org/2002/08/xquery-functions",
xmlXPathEscapeUriFunction);
--
2.47.1

0 comments on commit 3f20dd9

Please sign in to comment.