Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update with latest Jsoup (4690661) #47

Merged
merged 10 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ksoup-network/module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ apply: [ ../common.module-template.yaml ]
aliases:
- jvmAndAndroid: [ jvm, android ]
- concurrent: [ jvm, android, linuxX64, linuxArm64, tvosArm64, tvosX64, tvosSimulatorArm64, macosX64, macosArm64, iosArm64, iosSimulatorArm64, iosX64, mingwX64 ]
- jsAndWasm: [js, wasm]

repositories:
- mavenLocal
Expand All @@ -23,7 +24,7 @@ dependencies@jvmAndAndroid:
dependencies@apple:
- $libs.ktor.client.darwin

dependencies@js:
dependencies@jsAndWasm:
- $libs.ktor.client.js

dependencies@mingw:
Expand Down
1 change: 1 addition & 0 deletions ksoup-test/module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ repositories:
test-dependencies:
- ../ksoup
- $libs.korlibs.io
- $libs.kotlinx.io
- $libs.codepoints
- $libs.kotlinx.coroutines.test
- $libs.kotlinx.datetime
Expand Down
5 changes: 2 additions & 3 deletions ksoup-test/test/com/fleeksoft/ksoup/PlatformTest.kt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.fleeksoft.ksoup

import com.fleeksoft.ksoup.ported.jsSupportedRegex
import kotlin.test.BeforeTest
import kotlin.test.Test
import kotlin.test.assertEquals

Expand All @@ -10,9 +9,9 @@ class PlatformTest {
@Test
fun testJsSupportedRegex() {
val regex2 = jsSupportedRegex("img[src~=(?i)\\.(png|jpe?g)]")
val expected2 = if (Platform.isJS()) """img[src~=\.(png|jpe?g)]""" else """img[src~=(?i)\.(png|jpe?g)]"""
val expected2 = if (Platform.isJsOrWasm()) """img[src~=\.(png|jpe?g)]""" else """img[src~=(?i)\.(png|jpe?g)]"""
assertEquals(expected2, regex2.pattern)
if (Platform.isJS()) {
if (Platform.isJsOrWasm()) {
assertEquals(RegexOption.IGNORE_CASE, regex2.options.first())
}
}
Expand Down
68 changes: 37 additions & 31 deletions ksoup-test/test/com/fleeksoft/ksoup/TestHelper.kt
Original file line number Diff line number Diff line change
@@ -1,65 +1,71 @@
package com.fleeksoft.ksoup

import com.fleeksoft.ksoup.io.FileSource
import com.fleeksoft.ksoup.io.SourceReader
import com.fleeksoft.ksoup.ported.openSourceReader
import korlibs.io.compression.deflate.GZIP
import korlibs.io.compression.uncompress
import korlibs.io.file.VfsFile
import korlibs.io.file.fullName
import korlibs.io.file.readAsSyncStream
import korlibs.io.file.std.uniVfs
import korlibs.io.stream.readAll
import kotlinx.io.buffered
import kotlinx.io.files.Path
import kotlinx.io.files.SystemFileSystem
import kotlinx.io.readByteArray

object TestHelper {

suspend fun readGzipResource(file: String): SourceReader {
return readGzipFile(getResourceAbsolutePath(file).uniVfs)
suspend fun readGzipResource(resource: String): SourceReader {
return readGzipFile(resource)
}

suspend fun readResource(file: String): SourceReader {
if (file.endsWith(".gz") || file.endsWith(".z")) {
return readGzipResource(file)
suspend fun readResource(resource: String): SourceReader {
if (resource.endsWith(".gz") || resource.endsWith(".z")) {
return readGzipResource(resource)
}
return readFile(getResourceAbsolutePath(file).uniVfs)
return readFile(resource)
}

fun getResourceAbsolutePath(resourceName: String): String {
if (Platform.isWindows()) {
fun getResourceAbsolutePath(resourceName: String, absForWindows: Boolean = true): String {
if (Platform.isWindows() && !BuildConfig.isKotlinx && absForWindows) {
return "../../../../testResources/$resourceName"
} else if (Platform.isJS()) {
} else if (Platform.isJsOrWasm()) {
return "https://raw.githubusercontent.com/fleeksoft/ksoup/release/ksoup-test/testResources/$resourceName"
}
return "${BuildConfig.PROJECT_ROOT}/ksoup-test/testResources/$resourceName"
}

suspend fun getFileAsString(file: VfsFile): String {
val bytes: ByteArray = if (file.fullName.endsWith(".gz")) {
readGzipFile(file).readAllBytes()
suspend fun readResourceAsString(resourceName: String): String {
val bytes: ByteArray = if (resourceName.endsWith(".gz")) {
readGzipFile(resourceName).readAllBytes()
} else {
readFile(file).readAllBytes()
readFile(resourceName).readAllBytes()
}
return bytes.decodeToString()
}

suspend fun resourceFilePathToStream(path: String): SourceReader {
val file = this.getResourceAbsolutePath(path).uniVfs
return pathToStream(file)
}

suspend fun pathToStream(file: VfsFile): SourceReader {
return if (file.fullName.endsWith(".gz") || file.fullName.endsWith(".z")) {
readGzipFile(file)
suspend fun resourceFilePathToStream(resource: String): SourceReader {
return if (resource.endsWith(".gz") || resource.endsWith(".z")) {
readGzipFile(resource)
} else {
readFile(file)
readFile(resource)
}
}

suspend fun readFile(file: VfsFile): SourceReader {
return file.readAll().openSourceReader()
private suspend fun readFile(resource: String): SourceReader {
val abs = getResourceAbsolutePath(resource, absForWindows = false)
val bytes = if (Platform.isJsOrWasm()) {
abs.uniVfs.readAll()
} else {
SystemFileSystem.source(Path(abs)).buffered().readByteArray()
}
return bytes.openSourceReader()
}

suspend fun readGzipFile(file: VfsFile): SourceReader {
return file.readAsSyncStream().readAll().uncompress(GZIP).openSourceReader()
private suspend fun readGzipFile(resource: String): SourceReader {
val abs = getResourceAbsolutePath(resource, absForWindows = false)
val bytes = if (Platform.isJsOrWasm()) {
abs.uniVfs.readAll()
} else {
SystemFileSystem.source(Path(abs)).buffered().readByteArray()
}
return bytes.uncompress(GZIP).openSourceReader()
}
}
66 changes: 31 additions & 35 deletions ksoup-test/test/com/fleeksoft/ksoup/helper/DataUtilTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import com.fleeksoft.ksoup.ported.io.Charsets
import com.fleeksoft.ksoup.ported.openSourceReader
import com.fleeksoft.ksoup.ported.toByteArray
import com.fleeksoft.ksoup.ported.toSourceFile
import korlibs.io.file.std.uniVfs
import kotlinx.coroutines.test.runTest
import kotlin.test.*

Expand Down Expand Up @@ -129,7 +128,7 @@ class DataUtilTest {

@Test
fun secondMetaElementWithContentTypeContainsCharsetParameter() {
if (Platform.isJS() || Platform.isApple() || Platform.isWindows()) {
if (Platform.isJsOrWasm() || Platform.isApple() || Platform.isWindows()) {
// FIXME: euc-kr charset not supported
return
}
Expand Down Expand Up @@ -167,38 +166,38 @@ class DataUtilTest {

@Test
fun supportsBOMinFiles() = runTest {
if (BuildConfig.isKotlinx && Platform.isJS()) {
if (BuildConfig.isKotlinx && Platform.isJsOrWasm()) {
// FIXME: UTF-16 charset not supported
return@runTest
}
var input = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16be.html")
var doc: Document =
Ksoup.parseFile(filePath = input, baseUri = "http://example.com", charsetName = null)
assertTrue(doc.title().contains("UTF-16BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-16BE")
assertContains(doc.text(), "가각갂갃간갅")
input = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16le.html")
doc = Ksoup.parseFile(filePath = input, baseUri = "http://example.com", charsetName = null)
assertTrue(doc.title().contains("UTF-16LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-16LE")
assertContains(doc.text(), "가각갂갃간갅")

if (Platform.isJS() || Platform.isWindows() || Platform.isLinux()) {
if (Platform.isJsOrWasm() || Platform.isWindows() || Platform.isLinux()) {
// FIXME: UTF-32 charset not supported
return@runTest
}

input = TestHelper.getResourceAbsolutePath("bomtests/bom_utf32be.html")
doc = Ksoup.parseFile(filePath = input, baseUri = "http://example.com", charsetName = null)
assertTrue(doc.title().contains("UTF-32BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-32BE")
assertContains(doc.text(), "가각갂갃간갅")
input = TestHelper.getResourceAbsolutePath("bomtests/bom_utf32le.html")
doc = Ksoup.parseFile(filePath = input, baseUri = "http://example.com", charsetName = null)
assertTrue(doc.title().contains("UTF-32LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-32LE")
assertContains(doc.text(), "가각갂갃간갅")
}

@Test
fun streamerSupportsBOMinFiles() = runTest {
if (BuildConfig.isKotlinx && Platform.isJS()) {
if (BuildConfig.isKotlinx && Platform.isJsOrWasm()) {
// FIXME: UTF-16 charset not supported
return@runTest
}
Expand All @@ -208,31 +207,31 @@ class DataUtilTest {

var doc: Document = DataUtil.streamParser(sourceReader = source, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-16BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-16BE")
assertContains(doc.text(), "가각갂갃간갅")

source = TestHelper.readResource("bomtests/bom_utf16le.html")
doc = DataUtil.streamParser(sourceReader = source, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-16LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-16LE")
assertContains(doc.text(), "가각갂갃간갅")

if (Platform.isJS() || Platform.isWindows() || Platform.isLinux()) {
if (Platform.isJsOrWasm() || Platform.isWindows() || Platform.isLinux()) {
// FIXME: UTF-32 charset not supported
return@runTest
}

source = TestHelper.readResource("bomtests/bom_utf32be.html")
doc = DataUtil.streamParser(sourceReader = source, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-32BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-32BE")
assertContains(doc.text(), "가각갂갃간갅")

source = TestHelper.readResource("bomtests/bom_utf32le.html")
doc = DataUtil.streamParser(sourceReader = source, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-32LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
assertContains(doc.title(), "UTF-32LE")
assertContains(doc.text(), "가각갂갃간갅")
}

@Test
Expand Down Expand Up @@ -286,13 +285,11 @@ class DataUtilTest {
@Test
fun supportsXmlCharsetDeclaration() {
val encoding = "iso-8859-1"
val soup =
(
"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>" +
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\">Hellö Wörld!</html>"
)
.toByteArray(Charsets.forName(encoding)).openSourceReader()
val soup = (
"<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>" +
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\">Hellö Wörld!</html>"
).toByteArray(Charsets.forName(encoding)).openSourceReader()
val doc: Document = Ksoup.parse(soup, baseUri = "", charsetName = null)
assertEquals("Hellö Wörld!", doc.body().text())
}
Expand Down Expand Up @@ -341,9 +338,10 @@ class DataUtilTest {
// kotlinx module not support gzip
return@runTest
}
val resourceFile = TestHelper.getResourceAbsolutePath("htmltests/large.html.gz")
val resourceName = "htmltests/large.html.gz"
val resourceFile = TestHelper.getResourceAbsolutePath(resourceName)
val inputFile = resourceFile.toSourceFile()
val input: String = TestHelper.getFileAsString(resourceFile.uniVfs)
val input: String = TestHelper.readResourceAsString(resourceName)

val expected = Ksoup.parse(input, "https://example.com")
val doc: Document = Ksoup.parseFile(inputFile, baseUri = "https://example.com", charsetName = null)
Expand All @@ -354,8 +352,7 @@ class DataUtilTest {

@Test
fun testStringVsSourceReaderParse() = runTest {
val resourceFile = TestHelper.getResourceAbsolutePath("htmltests/large.html.gz")
val input: String = TestHelper.getFileAsString(resourceFile.uniVfs)
val input: String = TestHelper.readResourceAsString("htmltests/large.html.gz")

val expected = Ksoup.parse(input, "https://example.com")
val doc: Document = Ksoup.parse(sourceReader = input.openSourceReader(), baseUri = "https://example.com", charsetName = null)
Expand All @@ -365,8 +362,7 @@ class DataUtilTest {

@Test
fun handlesUnlimitedRead() = runTest {
val inputFile: String = TestHelper.getResourceAbsolutePath("htmltests/large.html.gz")
val input: String = TestHelper.getFileAsString(inputFile.uniVfs)
val input: String = TestHelper.readResourceAsString("htmltests/large.html.gz")
val byteBuffer: ByteArray = DataUtil.readToByteBuffer(input.openSourceReader(), 0)
val read = byteBuffer.decodeToString()
assertEquals(input, read)
Expand Down
11 changes: 4 additions & 7 deletions ksoup-test/test/com/fleeksoft/ksoup/integration/ParseTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import com.fleeksoft.ksoup.Ksoup.parseFile
import com.fleeksoft.ksoup.nodes.Document
import com.fleeksoft.ksoup.parser.Parser
import com.fleeksoft.ksoup.ported.openSourceReader
import korlibs.io.file.std.uniVfs
import kotlinx.coroutines.test.runTest
import kotlin.test.*

Expand All @@ -19,7 +18,7 @@ class ParseTest {

@Test
fun testHtml5Charset() = runTest {
if (Platform.isApple() || Platform.isWindows() || (BuildConfig.isKotlinx && Platform.isJS())) {
if (Platform.isApple() || Platform.isWindows() || (BuildConfig.isKotlinx && Platform.isJsOrWasm())) {
// don't support gb2312 or gbk
return@runTest
}
Expand Down Expand Up @@ -75,7 +74,7 @@ class ParseTest {
@Test
fun testLowercaseUtf8Charset() = runTest {
val resourceName = "htmltests/lowercase-charset-test.html"
val doc: Document = if (BuildConfig.isKotlinx && Platform.isJS()) {
val doc: Document = if (BuildConfig.isKotlinx && Platform.isJsOrWasm()) {
val source = TestHelper.readResource(resourceName)
Ksoup.parse(sourceReader = source, baseUri = resourceName)
} else {
Expand Down Expand Up @@ -130,8 +129,7 @@ class ParseTest {

@Test
fun testWikiExpandedFromString() = runTest {
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-edit.html.gz")
val html = TestHelper.getFileAsString(input.uniVfs)
val html = TestHelper.readResourceAsString("htmltests/xwiki-edit.html.gz")
val doc = parse(html)
assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text())
val wantHtml =
Expand All @@ -141,8 +139,7 @@ class ParseTest {

@Test
fun testWikiFromString() = runTest {
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-1324.html.gz")
val html = TestHelper.getFileAsString(input.uniVfs)
val html = TestHelper.readResourceAsString("htmltests/xwiki-1324.html.gz")
val doc = parse(html)
assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text())
val wantHtml =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class GithubIssue19 {

@Test
fun testAttributeIncorrectMixCharsetIssue() = runTest {
if (Platform.isJS()) {
if (Platform.isJsOrWasm()) {
// timeout issue
return@runTest
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package com.fleeksoft.ksoup.issues

import com.fleeksoft.ksoup.Ksoup
import com.fleeksoft.ksoup.TestHelper
import korlibs.io.file.std.uniVfs
import kotlinx.coroutines.test.runTest
import kotlin.test.Test
import kotlin.test.assertEquals
Expand All @@ -11,7 +10,7 @@ class GithubIssuesTests {
@Test
fun testIssue20DuplicateElements() = runTest {
// /~https://github.com/fleeksoft/ksoup/issues/20
Ksoup.parse(TestHelper.getFileAsString(TestHelper.getResourceAbsolutePath("htmltests/issue20.html.gz").uniVfs))
Ksoup.parse(TestHelper.readResourceAsString("htmltests/issue20.html.gz"))
// Ksoup.parseGetRequest("https://www.dm530w.org/")
.apply {
body().select("div[class=firs l]")
Expand Down
4 changes: 2 additions & 2 deletions ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class DocumentTest {
fun testLocation() = runTest {
// tests location vs base href
val resourceName = "htmltests/basehref.html"
val doc: Document = if (BuildConfig.isKotlinx && Platform.isJS()) {
val doc: Document = if (BuildConfig.isKotlinx && Platform.isJsOrWasm()) {
val source = TestHelper.readResource(resourceName)
Ksoup.parse(sourceReader = source, baseUri = "http://example.com/", charsetName = "UTF-8")
} else {
Expand Down Expand Up @@ -468,7 +468,7 @@ class DocumentTest {

@Test
fun testShiftJisRoundtrip() {
if (Platform.isJS()) {
if (Platform.isJsOrWasm()) {
// Shift_JIS not supported
return
}
Expand Down
Loading
Loading