From 66195e2fddd840bd90cbb16707db33021af81e60 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 30 Nov 2023 22:24:16 +0800 Subject: [PATCH] New `xml_find_int()` to get integers from an XPath (#412) --- NAMESPACE | 4 ++++ NEWS.md | 3 +++ R/xml_find.R | 27 +++++++++++++++++++++++++++ man/xml_find_all.Rd | 3 +++ tests/testthat/_snaps/xml_find.md | 18 ++++++++++++++++++ tests/testthat/test-xml_find.R | 18 ++++++++++++++++++ 6 files changed, 73 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 6e9562fa..8f74da43 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,6 +80,9 @@ S3method(xml_find_chr,xml_nodeset) S3method(xml_find_first,xml_missing) S3method(xml_find_first,xml_node) S3method(xml_find_first,xml_nodeset) +S3method(xml_find_int,xml_missing) +S3method(xml_find_int,xml_node) +S3method(xml_find_int,xml_nodeset) S3method(xml_find_lgl,xml_missing) S3method(xml_find_lgl,xml_node) S3method(xml_find_lgl,xml_nodeset) @@ -149,6 +152,7 @@ export(xml_dtd) export(xml_find_all) export(xml_find_chr) export(xml_find_first) +export(xml_find_int) export(xml_find_lgl) export(xml_find_num) export(xml_find_one) diff --git a/NEWS.md b/NEWS.md index b119b53d..f8e422af 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # xml2 (development version) +* `xml_find_int()` analogous to `xml_find_num()` for returning integers + matched by an XPath (#365, @michaelchirico). + * Now compatible with limxml2 2.12.0 and later (@KNnut). * Fix format string issues detected in R-devel. diff --git a/R/xml_find.R b/R/xml_find.R index 4dac21cb..12194b47 100644 --- a/R/xml_find.R +++ b/R/xml_find.R @@ -178,6 +178,33 @@ xml_find_num.xml_missing <- function(x, xpath, ns = xml_ns(x)) { numeric(0) } +#' @export +#' @rdname xml_find_all +xml_find_int <- function(x, xpath, ns = xml_ns(x)) { + UseMethod("xml_find_int") +} + +#' @export +xml_find_int.xml_node <- function(x, xpath, ns = xml_ns(x)) { + res <- .Call(xpath_search, x$node, x$doc, xpath, ns, Inf) + check_number_whole(res, arg = I(paste0("Element at path `", xpath, "`"))) + as.integer(res) +} + +#' @export +xml_find_int.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) { + if (length(x) == 0) { + return(integer()) + } + + vapply(x, function(x) xml_find_int(x, xpath = xpath, ns = ns), integer(1)) +} + +#' @export +xml_find_int.xml_missing <- function(x, xpath, ns = xml_ns(x)) { + integer(0) +} + #' @export #' @rdname xml_find_all xml_find_chr <- function(x, xpath, ns = xml_ns(x)) { diff --git a/man/xml_find_all.Rd b/man/xml_find_all.Rd index e013f940..c3d9dacb 100644 --- a/man/xml_find_all.Rd +++ b/man/xml_find_all.Rd @@ -5,6 +5,7 @@ \alias{xml_find_all.xml_nodeset} \alias{xml_find_first} \alias{xml_find_num} +\alias{xml_find_int} \alias{xml_find_chr} \alias{xml_find_lgl} \alias{xml_find_one} @@ -18,6 +19,8 @@ xml_find_first(x, xpath, ns = xml_ns(x)) xml_find_num(x, xpath, ns = xml_ns(x)) +xml_find_int(x, xpath, ns = xml_ns(x)) + xml_find_chr(x, xpath, ns = xml_ns(x)) xml_find_lgl(x, xpath, ns = xml_ns(x)) diff --git a/tests/testthat/_snaps/xml_find.md b/tests/testthat/_snaps/xml_find.md index 65f85b6e..7d1f0ebe 100644 --- a/tests/testthat/_snaps/xml_find.md +++ b/tests/testthat/_snaps/xml_find.md @@ -2,6 +2,24 @@ Element at path `//z` must be a number, not a object. +# xml_find_int errors with non integer results + + Code + xml_find_int(x, "//z") + Condition + Error in `xml_find_int()`: + ! Element at path `//z` must be a whole number, not a object. + Code + xml_find_int(x, "//y") + Condition + Error in `xml_find_int()`: + ! Element at path `//y` must be a whole number, not a list. + Code + xml_find_int(x, "number(1.1)") + Condition + Error in `xml_find_int()`: + ! Element at path `number(1.1)` must be a whole number, not the number 1.1. + # xml_find_chr errors with non character results Element at path `//z` must be a single string, not a object. diff --git a/tests/testthat/test-xml_find.R b/tests/testthat/test-xml_find.R index 056b4541..2311d98b 100644 --- a/tests/testthat/test-xml_find.R +++ b/tests/testthat/test-xml_find.R @@ -85,6 +85,24 @@ test_that("xml_find_num returns a numeric result", { expect_equal(xml_find_num(x, "1 div floor(-0)"), -Inf) }) +# Find int --------------------------------------------------------------------- +test_that("xml_find_int errors with non integer results", { + x <- read_xml("") + expect_snapshot(error = TRUE, { + xml_find_int(x, "//z") + xml_find_int(x, "//y") + xml_find_int(x, "number(1.1)") + }) +}) + +test_that("xml_find_int returns a integer result", { + x <- read_xml("1") + + expect_identical(xml_find_int(x, "1 div floor(-0.1)"), -1L) + expect_identical(xml_find_int(x, "number(//y)"), 1L) + expect_identical(xml_find_int(x, "string-length(string('abcd'))"), 4L) +}) + # Find chr --------------------------------------------------------------------- test_that("xml_find_chr errors with non character results", { x <- read_xml("")