From 856f4f0e7337fd3c763752c85acbc30333ad7fa9 Mon Sep 17 00:00:00 2001 From: Valeriy Vyrva Date: Thu, 29 Dec 2022 12:26:54 +0300 Subject: [PATCH 1/6] LinkChecker: Make a "tool" which useful for checking links in directory for accessibility --- .../name/valery1707/problem/LinkChecker.kt | 148 ++++++++++++++ .../valery1707/problem/LinkCheckerTest.kt | 191 ++++++++++++++++++ src/test/resources/linkChecker/Demo.md | 5 + 3 files changed, 344 insertions(+) create mode 100644 src/main/kotlin/name/valery1707/problem/LinkChecker.kt create mode 100644 src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt create mode 100644 src/test/resources/linkChecker/Demo.md diff --git a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt new file mode 100644 index 0000000..f69ce56 --- /dev/null +++ b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt @@ -0,0 +1,148 @@ +package name.valery1707.problem + +import java.net.URI +import java.net.http.HttpClient +import java.net.http.HttpRequest +import java.net.http.HttpResponse +import java.nio.file.Path +import java.time.Duration +import java.time.Instant +import java.time.temporal.ChronoField.NANO_OF_SECOND +import kotlin.io.path.ExperimentalPathApi +import kotlin.io.path.PathWalkOption +import kotlin.io.path.readText +import kotlin.io.path.walk + +/** + * todo Add description + * todo Make async (probably with coroutines) + */ +class LinkChecker(private val root: Path) { + /** + * Сканируем все файлы из директории, ищем в тексте ссылки, проверяем их на доступность + */ + @OptIn(ExperimentalPathApi::class) + fun findInvalid(client: HttpClient): Map { + val filePos2uriCheck = root + .walk(PathWalkOption.FOLLOW_LINKS) + .map { root.relativize(it) } + .map { + it to loadFile(root.resolve(it)) + } + .flatMap { pathWithText -> + pathWithText.second.findUri() + .map { (pathWithText.first to it.first) to it.second } + } + .take(20)// todo Remove limit + .map { + it.first to (it.second to it.second.check(client)) + } + .filter { it.second.second.first != 200 } + .toList() + // todo remove + println("filePos2uriCheck = $filePos2uriCheck") + return filePos2uriCheck + .associateBy( + { "${it.first.first}:${it.first.second}" }, + { + when (it.second.second.first) { + in HTTP_REDIRECT -> "${it.second.first} -> ${it.second.second.first} -> ${it.second.second.second}" + -1 -> "${it.second.first} -> ${it.second.second.first} -> ${it.second.second.second.query}" + else -> "${it.second.first} -> ${it.second.second.first}" + } + }, + ) + } + + private fun loadFile(path: Path): String { + return path.readText() + } + + companion object { + /** + * https://stackoverflow.com/a/45690571 + */ + private val URI_PATTERN_FULL = ("" + + "(?[a-z][a-z0-9+.-]+):" + + "(?\\/\\/(?[^@]+@)?(?[a-z0-9.\\-_~]+)(?:\\d+)?)?" + + "(?(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@])+(?:\\/(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@])*)*|(?:\\/(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@])+)*)?" + + "(?\\?(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@]|[/?])+)?" + + "(?\\#(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@]|[/?])+)?" + + "").toRegex(RegexOption.IGNORE_CASE) + + private val URI_PATTERN_SIMPLE = URI_PATTERN_FULL.pattern + .replace("()", "") + .replace("?:", "") + .replace("+)*)?(?", "*)*)?(?") + .replace("(?[^@]+@)", "(?[\\w]+@)") + .toRegex(RegexOption.IGNORE_CASE) + + private fun MatchResult.position(text: String): String { + val prefix = text.subSequence(0, range.last) + val col = range.first - prefix.indexOfLast { it == '\n' } + val line = 1 + prefix.count { it == '\n' } + return "$line:$col" + } + + private fun String.findUri() = URI_PATTERN_SIMPLE + .findAll(this) + .filter { it.value.startsWith("http") } + .map { uri -> + (uri.position(this)) to (uri.value.trimEnd('.').toURI()) + } + .filter { it.second != null } + .map { it.first to it.second!! } + .filter { it.second.scheme in setOf("http", "https") } + + internal fun String.toURI(): URI? = try { + URI.create(this) + } catch (e: IllegalArgumentException) { + null + } + + private fun URI.check(client: HttpClient): Pair { + val request = HttpRequest.newBuilder(this).GET().build() + // todo Cache + return try { + // todo Logging + println("Check: $this") + val response = client.send(request, HttpResponse.BodyHandlers.discarding()) + when (response.statusCode()) { + //Redirects: extract new location + in HTTP_REDIRECT -> response.statusCode() to response.headers().firstValue("Location")!!.get().toURI()!! + + //Rate limiting: wait and retry + in HTTP_RATE_LIMIT -> { + val now = Instant.now() + val await = response.headers() + + // todo Extract to method + // https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#checking-your-rate-limit-status + .map()["x-ratelimit-reset"] + ?.asSequence() + ?.map(String::toLong)?.map(Instant::ofEpochSecond) + ?.map { Duration.between(now.with(NANO_OF_SECOND, 0), it) } + ?.map(Duration::toMillis) + ?.filter { it >= 0 } + ?.firstOrNull() + + ?: 500 + + // todo Logging + println("Await: $await ms") + Thread.sleep(await) + check(client) + } + + else -> response.statusCode() to response.uri() + } + } catch (e: Exception) { + // todo Logging + -1 to URI.create("http://host?message=${e.message?.replace(" ", "%20")}") + } + } + + private val HTTP_REDIRECT = setOf(301, 302, 307, 308) + private val HTTP_RATE_LIMIT = setOf(403) + } +} diff --git a/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt new file mode 100644 index 0000000..08bb79d --- /dev/null +++ b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt @@ -0,0 +1,191 @@ +package name.valery1707.problem + +import name.valery1707.problem.LinkChecker.Companion.toURI +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.api.Assertions.fail +import org.assertj.core.api.Assumptions.assumeThat +import org.junit.jupiter.api.Test +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.ValueSource +import java.net.Authenticator +import java.net.CookieHandler +import java.net.InetSocketAddress +import java.net.ProxySelector +import java.net.URI +import java.net.http.HttpClient +import java.net.http.HttpHeaders +import java.net.http.HttpRequest +import java.net.http.HttpResponse +import java.nio.file.Path +import java.time.Duration +import java.time.Instant +import java.util.* +import java.util.concurrent.CompletableFuture +import java.util.concurrent.Executor +import javax.net.ssl.SSLContext +import javax.net.ssl.SSLParameters +import javax.net.ssl.SSLSession +import kotlin.io.path.toPath + +typealias ResponseBuilder = (HttpRequest) -> HttpResponse +typealias ResponseMeta = Pair> + +internal class LinkCheckerTest { + + @ParameterizedTest + @ValueSource( + strings = [ + "./path/to/real/project", + ], + ) + internal fun checkReal(path: Path) { + assumeThat(path).isDirectory.isReadable + val client = HttpClient + .newBuilder() + .followRedirects(HttpClient.Redirect.NEVER) + .proxy(proxy) + .build() + val checker = LinkChecker(path) + assertThat(checker.findInvalid(client)).isEmpty() + } + + @Test + @Suppress("HttpUrlsUsage") + internal fun testDemo() { + val path = javaClass.getResource("/linkChecker/Demo.md")?.toURI()?.toPath()?.parent + assertThat(path).isNotNull.isDirectory.isReadable + + fun ok(): ResponseMeta = 200 to mapOf() + fun notFound(): ResponseMeta = 404 to mapOf() + fun redirect(code: Int, target: String): ResponseMeta = code to mapOf("Location" to target) + fun rateLimitGH(awaitMillis: Long): ResponseMeta = 403 to mapOf("x-ratelimit-reset" to Instant.now().plusMillis(awaitMillis).epochSecond.toString()) + + //Check links via: curl --silent -X GET --head 'URL' + val client = MockedHttpClient.fromMeta( + mapOf( + "https://ya.ru" to listOf( + redirect(302, "https://ya.ru/"), + ), + "https://ya.ru/" to listOf( + ok(), + ), + "http://schema.org" to listOf( + redirect(301, "https://schema.org/"), + ), + "https://github.com/androidx/androidx/blob/androidx-main/build.gradle" to listOf( + //todo Calculate header value on building response + //Will wait some time + rateLimitGH(2111), + //Will wait zero time + rateLimitGH(10), + //Will wait default time + rateLimitGH(-1500), + ok(), + ), + "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt" to listOf( + notFound(), + ), + ), + ) + + val checker = LinkChecker(path!!) + + assertThat(checker.findInvalid(client)).containsExactlyInAnyOrderEntriesOf( + mapOf( + "Demo.md:1:25" to "https://ya.ru -> 302 -> https://ya.ru/", + "Demo.md:3:14" to "http://schema.org -> 301 -> https://schema.org/", + "Demo.md:5:14" to "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt -> 404", + ), + ) + } + + @ParameterizedTest + @ValueSource( + strings = [ + "some invalid uri", + ], + ) + internal fun testInvalidUriString(uriString: String) { + assertThat(uriString.toURI()).isNull() + } + + private val proxy: ProxySelector by lazy { + sequenceOf( + "genproxy" to 8080, + ) + .map { InetSocketAddress(it.first, it.second) } + .filterNot { it.isUnresolved } + .map { ProxySelector.of(it) } + .firstOrNull() + ?: ProxySelector.getDefault() + } + + private class MockedHttpClient( + private val worker: ResponseBuilder, + ) : HttpClient() { + override fun cookieHandler(): Optional = Optional.empty() + override fun connectTimeout(): Optional = Optional.empty() + override fun followRedirects(): Redirect = Redirect.NEVER + override fun proxy(): Optional = Optional.empty() + override fun sslContext(): SSLContext = SSLContext.getDefault() + override fun sslParameters(): SSLParameters = sslContext().defaultSSLParameters + override fun authenticator(): Optional = Optional.empty() + override fun version(): Version = Version.HTTP_1_1 + override fun executor(): Optional = Optional.empty() + + override fun sendAsync( + request: HttpRequest, + responseBodyHandler: HttpResponse.BodyHandler, + pushPromiseHandler: HttpResponse.PushPromiseHandler?, + ): CompletableFuture> = sendAsync(request, responseBodyHandler) + + override fun sendAsync( + request: HttpRequest, + responseBodyHandler: HttpResponse.BodyHandler, + ): CompletableFuture> = CompletableFuture.supplyAsync { send(request, responseBodyHandler) } + + @Suppress("UNCHECKED_CAST") + override fun send(request: HttpRequest, responseBodyHandler: HttpResponse.BodyHandler): HttpResponse = + worker(request) as HttpResponse + + companion object { + fun fromMeta(responses: Map>): HttpClient = fromBuilders( + responses.mapValues { + it.value + .map> { meta -> + { req -> + MockedHttpResponse.fromRequest(req, meta.first, meta.second.mapValues { h -> listOf(h.value) }) + } + } + .toMutableList() + }, + ) + + fun fromBuilders(responses: Map>>): HttpClient = MockedHttpClient { req -> + responses[req.uri().toString()]?.removeFirst()?.invoke(req) ?: fail("Unknown response builders for ${req.uri()}") + } + } + } + + private class MockedHttpResponse( + private val request: HttpRequest, + private val statusCode: Int, + private val headers: HttpHeaders, + ) : HttpResponse { + override fun statusCode(): Int = statusCode + override fun request(): HttpRequest = request + override fun previousResponse(): Optional> = Optional.empty() + override fun headers(): HttpHeaders = headers + override fun body(): T? = null + override fun sslSession(): Optional = Optional.empty() + override fun uri(): URI = request().uri() + override fun version(): HttpClient.Version = request().version().orElse(HttpClient.Version.HTTP_1_1) + + companion object { + fun fromRequest(request: HttpRequest, statusCode: Int, headers: Map>): HttpResponse = MockedHttpResponse( + request, statusCode, HttpHeaders.of(headers) { _, _ -> true }, + ) + } + } + +} diff --git a/src/test/resources/linkChecker/Demo.md b/src/test/resources/linkChecker/Demo.md new file mode 100644 index 0000000..bb911e8 --- /dev/null +++ b/src/test/resources/linkChecker/Demo.md @@ -0,0 +1,5 @@ +Link with name: [named](https://ya.ru). +Link with name: [named](https://ya.ru/). +Link inlined http://schema.org. +Link with rate limiting: https://github.com/androidx/androidx/blob/androidx-main/build.gradle +Link absent: https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt From bb6ffb74a2b20635f6541f24fda71a326f56968c Mon Sep 17 00:00:00 2001 From: Valeriy Vyrva Date: Thu, 29 Dec 2022 12:34:53 +0300 Subject: [PATCH 2/6] LinkChecker: Remove unnecessary parentheses --- src/main/kotlin/name/valery1707/problem/LinkChecker.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt index f69ce56..e4848ee 100644 --- a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt +++ b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt @@ -31,7 +31,7 @@ class LinkChecker(private val root: Path) { } .flatMap { pathWithText -> pathWithText.second.findUri() - .map { (pathWithText.first to it.first) to it.second } + .map { pathWithText.first to it.first to it.second } } .take(20)// todo Remove limit .map { @@ -88,7 +88,7 @@ class LinkChecker(private val root: Path) { .findAll(this) .filter { it.value.startsWith("http") } .map { uri -> - (uri.position(this)) to (uri.value.trimEnd('.').toURI()) + uri.position(this) to uri.value.trimEnd('.').toURI() } .filter { it.second != null } .map { it.first to it.second!! } From a23fad692da99d9a79d08f5c1213a1081841a5be Mon Sep 17 00:00:00 2001 From: Valeriy Vyrva Date: Thu, 29 Dec 2022 13:59:09 +0300 Subject: [PATCH 3/6] LinkChecker: Use logging instead of direct printing into console --- build.gradle.kts | 3 +++ gradle/libs.versions.toml | 4 ++++ .../kotlin/name/valery1707/problem/LinkChecker.kt | 12 ++++++------ versions.lock | 8 ++++++-- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 1d3367c..01656af 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -27,6 +27,9 @@ repositories { dependencies { implementation(libs.jackson.databind) implementation(libs.mockneat) + implementation(libs.bundles.logger.api) + + runtimeOnly(libs.logger.impl) testImplementation(kotlin("test")) testImplementation(platform(libs.junit)) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 7a3367c..e58ff47 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -11,8 +11,12 @@ junit = { module = "org.junit:junit-bom", version.ref = "junit" } assertj = { module = "org.assertj:assertj-core", version = "3.23.1" } mockneat = { module = "net.andreinc:mockneat", version = "0.4.8" } jmh-ann = { module = "org.openjdk.jmh:jmh-generator-annprocess", version.ref = "jmh.tools" } +logger-api4j = { module = "org.slf4j:slf4j-api", version = "2.0.6" } +logger-api4k = { module = "io.github.microutils:kotlin-logging-jvm", version = "3.0.4" } +logger-impl = { module = "ch.qos.logback:logback-classic", version = "1.4.5" } [bundles] +logger-api = ["logger.api4j", "logger.api4k"] [plugins] jmh = { id = "me.champeau.jmh", version = "0.6.8" } diff --git a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt index e4848ee..6dc9f36 100644 --- a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt +++ b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt @@ -40,7 +40,7 @@ class LinkChecker(private val root: Path) { .filter { it.second.second.first != 200 } .toList() // todo remove - println("filePos2uriCheck = $filePos2uriCheck") + logger.debug { "filePos2uriCheck = $filePos2uriCheck" } return filePos2uriCheck .associateBy( { "${it.first.first}:${it.first.second}" }, @@ -59,6 +59,8 @@ class LinkChecker(private val root: Path) { } companion object { + private val logger = mu.KotlinLogging.logger {} + /** * https://stackoverflow.com/a/45690571 */ @@ -104,8 +106,7 @@ class LinkChecker(private val root: Path) { val request = HttpRequest.newBuilder(this).GET().build() // todo Cache return try { - // todo Logging - println("Check: $this") + logger.info("Check: $this") val response = client.send(request, HttpResponse.BodyHandlers.discarding()) when (response.statusCode()) { //Redirects: extract new location @@ -128,8 +129,7 @@ class LinkChecker(private val root: Path) { ?: 500 - // todo Logging - println("Await: $await ms") + logger.debug("Await: $await ms") Thread.sleep(await) check(client) } @@ -137,7 +137,7 @@ class LinkChecker(private val root: Path) { else -> response.statusCode() to response.uri() } } catch (e: Exception) { - // todo Logging + logger.error(e) { "Handle error on checking $this" } -1 to URI.create("http://host?message=${e.message?.replace(" ", "%20")}") } } diff --git a/versions.lock b/versions.lock index 5f1b93a..bca0543 100644 --- a/versions.lock +++ b/versions.lock @@ -1,4 +1,6 @@ # Run ./gradlew --write-locks to regenerate this file +ch.qos.logback:logback-classic:1.4.5 (1 constraints: 0c050136) +ch.qos.logback:logback-core:1.4.5 (1 constraints: 0b0d071d) com.fasterxml.jackson:jackson-bom:2.14.1 (3 constraints: 7f37c0d3) com.fasterxml.jackson.core:jackson-annotations:2.14.1 (2 constraints: 3f21f093) com.fasterxml.jackson.core:jackson-core:2.14.1 (2 constraints: 3f21f093) @@ -6,6 +8,7 @@ com.fasterxml.jackson.core:jackson-databind:2.14.1 (2 constraints: f1138582) com.github.mifmif:generex:1.0.2 (1 constraints: e00921ac) commons-codec:commons-codec:1.15 (1 constraints: b6094aa2) dk.brics.automaton:automaton:1.11-8 (1 constraints: 900be8e7) +io.github.microutils:kotlin-logging-jvm:3.0.4 (1 constraints: 0905fe35) net.andreinc:aleph:0.1.1 (1 constraints: df091eac) net.andreinc:markovneat:1.8 (1 constraints: 88099b98) net.andreinc:mockneat:0.4.8 (1 constraints: 0e05ff35) @@ -13,9 +16,10 @@ org.apache.commons:commons-lang3:3.12.0 (2 constraints: ab17f565) org.apache.commons:commons-text:1.9 (1 constraints: 89099c98) org.jetbrains:annotations:13.0 (1 constraints: df0e795c) org.jetbrains.kotlin:kotlin-stdlib:1.7.22 (3 constraints: 463049c4) -org.jetbrains.kotlin:kotlin-stdlib-common:1.7.22 (1 constraints: 450fd27a) +org.jetbrains.kotlin:kotlin-stdlib-common:1.7.22 (2 constraints: 71207b44) org.jetbrains.kotlin:kotlin-stdlib-jdk7:1.7.22 (1 constraints: e310fbd2) -org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.7.22 (1 constraints: 3e05453b) +org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.7.22 (2 constraints: 6a16ed11) +org.slf4j:slf4j-api:2.0.6 (3 constraints: 0723153f) [Test dependencies] net.bytebuddy:byte-buddy:1.12.10 (1 constraints: 7b0bbcea) From 60833a6604227ffe4c571a1a94d8c2325c2dec2c Mon Sep 17 00:00:00 2001 From: Valeriy Vyrva Date: Thu, 29 Dec 2022 14:19:09 +0300 Subject: [PATCH 4/6] LinkChecker: Generate response headers when processing a request in tests --- .../name/valery1707/problem/LinkCheckerTest.kt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt index 08bb79d..d7ee1f5 100644 --- a/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt +++ b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt @@ -28,7 +28,7 @@ import javax.net.ssl.SSLSession import kotlin.io.path.toPath typealias ResponseBuilder = (HttpRequest) -> HttpResponse -typealias ResponseMeta = Pair> +typealias ResponseMeta = () -> Pair> internal class LinkCheckerTest { @@ -55,10 +55,10 @@ internal class LinkCheckerTest { val path = javaClass.getResource("/linkChecker/Demo.md")?.toURI()?.toPath()?.parent assertThat(path).isNotNull.isDirectory.isReadable - fun ok(): ResponseMeta = 200 to mapOf() - fun notFound(): ResponseMeta = 404 to mapOf() - fun redirect(code: Int, target: String): ResponseMeta = code to mapOf("Location" to target) - fun rateLimitGH(awaitMillis: Long): ResponseMeta = 403 to mapOf("x-ratelimit-reset" to Instant.now().plusMillis(awaitMillis).epochSecond.toString()) + fun ok(): ResponseMeta = { 200 to mapOf() } + fun notFound(): ResponseMeta = { 404 to mapOf() } + fun redirect(code: Int, target: String): ResponseMeta = { code to mapOf("Location" to target) } + fun rateLimitGH(awaitMillis: Long): ResponseMeta = { 403 to mapOf("x-ratelimit-reset" to Instant.now().plusMillis(awaitMillis).epochSecond.toString()) } //Check links via: curl --silent -X GET --head 'URL' val client = MockedHttpClient.fromMeta( @@ -73,7 +73,6 @@ internal class LinkCheckerTest { redirect(301, "https://schema.org/"), ), "https://github.com/androidx/androidx/blob/androidx-main/build.gradle" to listOf( - //todo Calculate header value on building response //Will wait some time rateLimitGH(2111), //Will wait zero time @@ -152,8 +151,9 @@ internal class LinkCheckerTest { fun fromMeta(responses: Map>): HttpClient = fromBuilders( responses.mapValues { it.value - .map> { meta -> + .map> { metaBuilder -> { req -> + val meta = metaBuilder() MockedHttpResponse.fromRequest(req, meta.first, meta.second.mapValues { h -> listOf(h.value) }) } } From 688129e003d62726975413246ff110dec58a0d93 Mon Sep 17 00:00:00 2001 From: Valeriy Vyrva Date: Thu, 29 Dec 2022 15:01:48 +0300 Subject: [PATCH 5/6] LinkChecker: Support several types of rate limiting --- .../name/valery1707/problem/LinkChecker.kt | 52 ++++++++++++++----- .../valery1707/problem/LinkCheckerTest.kt | 15 +++++- src/test/resources/linkChecker/Demo.md | 4 +- 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt index 6dc9f36..b705415 100644 --- a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt +++ b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt @@ -2,11 +2,13 @@ package name.valery1707.problem import java.net.URI import java.net.http.HttpClient +import java.net.http.HttpHeaders import java.net.http.HttpRequest import java.net.http.HttpResponse import java.nio.file.Path import java.time.Duration import java.time.Instant +import java.time.format.DateTimeFormatter import java.time.temporal.ChronoField.NANO_OF_SECOND import kotlin.io.path.ExperimentalPathApi import kotlin.io.path.PathWalkOption @@ -115,19 +117,7 @@ class LinkChecker(private val root: Path) { //Rate limiting: wait and retry in HTTP_RATE_LIMIT -> { val now = Instant.now() - val await = response.headers() - - // todo Extract to method - // https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#checking-your-rate-limit-status - .map()["x-ratelimit-reset"] - ?.asSequence() - ?.map(String::toLong)?.map(Instant::ofEpochSecond) - ?.map { Duration.between(now.with(NANO_OF_SECOND, 0), it) } - ?.map(Duration::toMillis) - ?.filter { it >= 0 } - ?.firstOrNull() - - ?: 500 + val await = response.headers().rateLimitAwait(now) ?: 500 logger.debug("Await: $await ms") Thread.sleep(await) @@ -143,6 +133,40 @@ class LinkChecker(private val root: Path) { } private val HTTP_REDIRECT = setOf(301, 302, 307, 308) - private val HTTP_RATE_LIMIT = setOf(403) + private val HTTP_RATE_LIMIT = setOf(403, 429) + + private fun HttpHeaders.rateLimitAwait(now: Instant): Long? { + val map = map() + return HTTP_RATE_LIMIT_EXTRACTORS + .flatMap { map[it.key]?.asSequence()?.map { v -> it.value(v.trim(), now) } ?: emptySequence() } + .filterNotNull() + .firstOrNull { it >= 0 } + } + + private val HTTP_RATE_LIMIT_EXTRACTORS: Map Long?> = mapOf( + // https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#checking-your-rate-limit-status + "x-ratelimit-reset" to { value, now -> + value + .toLong() + .let(Instant::ofEpochSecond) + .let { Duration.between(now.with(NANO_OF_SECOND, 0), it) } + .let(Duration::toMillis) + }, + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After + "Retry-After" to { value, now -> + if (value.isDigit()) value.toLong() + else HTTP_DATE_FORMAT + .parse(value, Instant::from) + .let { Duration.between(now.with(NANO_OF_SECOND, 0), it) } + .let(Duration::toMillis) + }, + ) + + /** + * @see Specification + */ + internal val HTTP_DATE_FORMAT = DateTimeFormatter.RFC_1123_DATE_TIME + + private fun String.isDigit(): Boolean = this.all { it.isDigit() } } } diff --git a/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt index d7ee1f5..128a0d4 100644 --- a/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt +++ b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt @@ -1,5 +1,6 @@ package name.valery1707.problem +import name.valery1707.problem.LinkChecker.Companion.HTTP_DATE_FORMAT import name.valery1707.problem.LinkChecker.Companion.toURI import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Assertions.fail @@ -19,6 +20,7 @@ import java.net.http.HttpResponse import java.nio.file.Path import java.time.Duration import java.time.Instant +import java.time.ZoneId import java.util.* import java.util.concurrent.CompletableFuture import java.util.concurrent.Executor @@ -59,6 +61,10 @@ internal class LinkCheckerTest { fun notFound(): ResponseMeta = { 404 to mapOf() } fun redirect(code: Int, target: String): ResponseMeta = { code to mapOf("Location" to target) } fun rateLimitGH(awaitMillis: Long): ResponseMeta = { 403 to mapOf("x-ratelimit-reset" to Instant.now().plusMillis(awaitMillis).epochSecond.toString()) } + fun rateLimitSpecSec(awaitSec: Int): ResponseMeta = { 429 to mapOf("Retry-After" to awaitSec.toString()) } + fun rateLimitSpecDate(awaitMillis: Long): ResponseMeta = { + 429 to mapOf("Retry-After" to HTTP_DATE_FORMAT.format(Instant.now().plusMillis(awaitMillis).atZone(ZoneId.systemDefault()))) + } //Check links via: curl --silent -X GET --head 'URL' val client = MockedHttpClient.fromMeta( @@ -81,6 +87,13 @@ internal class LinkCheckerTest { rateLimitGH(-1500), ok(), ), + "https://www.bearer.com/" to listOf( + // Use variant with "delay-seconds" + rateLimitSpecSec(1), + // Use variant with "http-date" + rateLimitSpecDate(100), + ok(), + ), "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt" to listOf( notFound(), ), @@ -93,7 +106,7 @@ internal class LinkCheckerTest { mapOf( "Demo.md:1:25" to "https://ya.ru -> 302 -> https://ya.ru/", "Demo.md:3:14" to "http://schema.org -> 301 -> https://schema.org/", - "Demo.md:5:14" to "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt -> 404", + "Demo.md:7:14" to "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt -> 404", ), ) } diff --git a/src/test/resources/linkChecker/Demo.md b/src/test/resources/linkChecker/Demo.md index bb911e8..f26897b 100644 --- a/src/test/resources/linkChecker/Demo.md +++ b/src/test/resources/linkChecker/Demo.md @@ -1,5 +1,7 @@ Link with name: [named](https://ya.ru). Link with name: [named](https://ya.ru/). Link inlined http://schema.org. -Link with rate limiting: https://github.com/androidx/androidx/blob/androidx-main/build.gradle +Link with rate limiting: +* https://github.com/androidx/androidx/blob/androidx-main/build.gradle +* https://www.bearer.com/ Link absent: https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt From f2e87db2cfeabc1fcef156c6822157a0b92363bc Mon Sep 17 00:00:00 2001 From: Valeriy Vyrva Date: Fri, 30 Dec 2022 10:37:32 +0300 Subject: [PATCH 6/6] LinkChecker: Use OkHttpClient instead of JVM HttpClient This library supports caching --- build.gradle.kts | 2 + gradle/libs.versions.toml | 2 + .../name/valery1707/problem/LinkChecker.kt | 55 ++++---- .../valery1707/problem/LinkCheckerTest.kt | 127 +++++------------- src/test/resources/linkChecker/Demo.md | 4 +- 5 files changed, 66 insertions(+), 124 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 01656af..f024f9d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -28,6 +28,7 @@ dependencies { implementation(libs.jackson.databind) implementation(libs.mockneat) implementation(libs.bundles.logger.api) + implementation(libs.okhttp.core) runtimeOnly(libs.logger.impl) @@ -35,6 +36,7 @@ dependencies { testImplementation(platform(libs.junit)) testImplementation("org.junit.jupiter:junit-jupiter-params") testImplementation(libs.assertj) + testImplementation(libs.okhttp.mock) jmhAnnotationProcessor(libs.jmh.ann) } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index e58ff47..8d70b79 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -14,6 +14,8 @@ jmh-ann = { module = "org.openjdk.jmh:jmh-generator-annprocess", version.ref = " logger-api4j = { module = "org.slf4j:slf4j-api", version = "2.0.6" } logger-api4k = { module = "io.github.microutils:kotlin-logging-jvm", version = "3.0.4" } logger-impl = { module = "ch.qos.logback:logback-classic", version = "1.4.5" } +okhttp-core = { module = "com.squareup.okhttp3:okhttp", version = "4.10.0" } +okhttp-mock = { module = "com.github.gmazzo:okhttp-mock", version = "1.5.0" } [bundles] logger-api = ["logger.api4j", "logger.api4k"] diff --git a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt index b705415..527edf7 100644 --- a/src/main/kotlin/name/valery1707/problem/LinkChecker.kt +++ b/src/main/kotlin/name/valery1707/problem/LinkChecker.kt @@ -1,10 +1,9 @@ package name.valery1707.problem +import okhttp3.Headers +import okhttp3.OkHttpClient +import okhttp3.Request import java.net.URI -import java.net.http.HttpClient -import java.net.http.HttpHeaders -import java.net.http.HttpRequest -import java.net.http.HttpResponse import java.nio.file.Path import java.time.Duration import java.time.Instant @@ -24,7 +23,7 @@ class LinkChecker(private val root: Path) { * Сканируем все файлы из директории, ищем в тексте ссылки, проверяем их на доступность */ @OptIn(ExperimentalPathApi::class) - fun findInvalid(client: HttpClient): Map { + fun findInvalid(client: OkHttpClient): Map { val filePos2uriCheck = root .walk(PathWalkOption.FOLLOW_LINKS) .map { root.relativize(it) } @@ -104,27 +103,28 @@ class LinkChecker(private val root: Path) { null } - private fun URI.check(client: HttpClient): Pair { - val request = HttpRequest.newBuilder(this).GET().build() + private fun URI.check(client: OkHttpClient): Pair { + val request = Request.Builder().url(this.toURL()).get().build() // todo Cache return try { logger.info("Check: $this") - val response = client.send(request, HttpResponse.BodyHandlers.discarding()) - when (response.statusCode()) { - //Redirects: extract new location - in HTTP_REDIRECT -> response.statusCode() to response.headers().firstValue("Location")!!.get().toURI()!! - - //Rate limiting: wait and retry - in HTTP_RATE_LIMIT -> { - val now = Instant.now() - val await = response.headers().rateLimitAwait(now) ?: 500 - - logger.debug("Await: $await ms") - Thread.sleep(await) - check(client) + client.newCall(request).execute().use { response -> + when (response.code) { + //Redirects: extract new location + in HTTP_REDIRECT -> response.code to response.header("Location")!!.toURI()!! + + //Rate limiting: wait and retry + in HTTP_RATE_LIMIT -> { + val now = Instant.now() + val await = response.headers.rateLimitAwait(now) ?: 500 + + logger.debug("Await: $await ms") + Thread.sleep(await) + check(client) + } + + else -> response.code to response.request.url.toUri() } - - else -> response.statusCode() to response.uri() } } catch (e: Exception) { logger.error(e) { "Handle error on checking $this" } @@ -135,13 +135,10 @@ class LinkChecker(private val root: Path) { private val HTTP_REDIRECT = setOf(301, 302, 307, 308) private val HTTP_RATE_LIMIT = setOf(403, 429) - private fun HttpHeaders.rateLimitAwait(now: Instant): Long? { - val map = map() - return HTTP_RATE_LIMIT_EXTRACTORS - .flatMap { map[it.key]?.asSequence()?.map { v -> it.value(v.trim(), now) } ?: emptySequence() } - .filterNotNull() - .firstOrNull { it >= 0 } - } + private fun Headers.rateLimitAwait(now: Instant): Long? = HTTP_RATE_LIMIT_EXTRACTORS + .flatMap { values(it.key).asSequence().map { v -> it.value(v.trim(), now) } } + .filterNotNull() + .firstOrNull { it >= 0 } private val HTTP_RATE_LIMIT_EXTRACTORS: Map Long?> = mapOf( // https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#checking-your-rate-limit-status diff --git a/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt index 128a0d4..7fde941 100644 --- a/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt +++ b/src/test/kotlin/name/valery1707/problem/LinkCheckerTest.kt @@ -2,34 +2,24 @@ package name.valery1707.problem import name.valery1707.problem.LinkChecker.Companion.HTTP_DATE_FORMAT import name.valery1707.problem.LinkChecker.Companion.toURI +import okhttp3.Headers.Companion.toHeaders +import okhttp3.OkHttpClient +import okhttp3.mock.MockInterceptor +import okhttp3.mock.body import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Assertions.fail import org.assertj.core.api.Assumptions.assumeThat import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.ValueSource -import java.net.Authenticator -import java.net.CookieHandler import java.net.InetSocketAddress import java.net.ProxySelector -import java.net.URI -import java.net.http.HttpClient -import java.net.http.HttpHeaders -import java.net.http.HttpRequest -import java.net.http.HttpResponse import java.nio.file.Path -import java.time.Duration import java.time.Instant import java.time.ZoneId import java.util.* -import java.util.concurrent.CompletableFuture -import java.util.concurrent.Executor -import javax.net.ssl.SSLContext -import javax.net.ssl.SSLParameters -import javax.net.ssl.SSLSession import kotlin.io.path.toPath -typealias ResponseBuilder = (HttpRequest) -> HttpResponse typealias ResponseMeta = () -> Pair> internal class LinkCheckerTest { @@ -42,10 +32,9 @@ internal class LinkCheckerTest { ) internal fun checkReal(path: Path) { assumeThat(path).isDirectory.isReadable - val client = HttpClient - .newBuilder() - .followRedirects(HttpClient.Redirect.NEVER) - .proxy(proxy) + val client = OkHttpClient.Builder() + .followRedirects(false).followSslRedirects(false) + .proxySelector(proxy) .build() val checker = LinkChecker(path) assertThat(checker.findInvalid(client)).isEmpty() @@ -67,18 +56,18 @@ internal class LinkCheckerTest { } //Check links via: curl --silent -X GET --head 'URL' - val client = MockedHttpClient.fromMeta( + val client = mockHttpClient( mapOf( - "https://ya.ru" to listOf( - redirect(302, "https://ya.ru/"), + "https://habr.com/ru/company/otus/blog/707724/comments" to mutableListOf( + redirect(302, "https://habr.com/ru/company/otus/blog/707724/comments/"), ), - "https://ya.ru/" to listOf( + "https://habr.com/ru/company/otus/blog/707724/comments/" to mutableListOf( ok(), ), - "http://schema.org" to listOf( + "http://schema.org/" to mutableListOf( redirect(301, "https://schema.org/"), ), - "https://github.com/androidx/androidx/blob/androidx-main/build.gradle" to listOf( + "https://github.com/androidx/androidx/blob/androidx-main/build.gradle" to mutableListOf( //Will wait some time rateLimitGH(2111), //Will wait zero time @@ -87,14 +76,14 @@ internal class LinkCheckerTest { rateLimitGH(-1500), ok(), ), - "https://www.bearer.com/" to listOf( + "https://www.bearer.com/" to mutableListOf( // Use variant with "delay-seconds" rateLimitSpecSec(1), // Use variant with "http-date" rateLimitSpecDate(100), ok(), ), - "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt" to listOf( + "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt" to mutableListOf( notFound(), ), ), @@ -104,7 +93,7 @@ internal class LinkCheckerTest { assertThat(checker.findInvalid(client)).containsExactlyInAnyOrderEntriesOf( mapOf( - "Demo.md:1:25" to "https://ya.ru -> 302 -> https://ya.ru/", + "Demo.md:1:25" to "https://habr.com/ru/company/otus/blog/707724/comments -> 302 -> https://habr.com/ru/company/otus/blog/707724/comments/", "Demo.md:3:14" to "http://schema.org -> 301 -> https://schema.org/", "Demo.md:7:14" to "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt -> 404", ), @@ -132,72 +121,24 @@ internal class LinkCheckerTest { ?: ProxySelector.getDefault() } - private class MockedHttpClient( - private val worker: ResponseBuilder, - ) : HttpClient() { - override fun cookieHandler(): Optional = Optional.empty() - override fun connectTimeout(): Optional = Optional.empty() - override fun followRedirects(): Redirect = Redirect.NEVER - override fun proxy(): Optional = Optional.empty() - override fun sslContext(): SSLContext = SSLContext.getDefault() - override fun sslParameters(): SSLParameters = sslContext().defaultSSLParameters - override fun authenticator(): Optional = Optional.empty() - override fun version(): Version = Version.HTTP_1_1 - override fun executor(): Optional = Optional.empty() - - override fun sendAsync( - request: HttpRequest, - responseBodyHandler: HttpResponse.BodyHandler, - pushPromiseHandler: HttpResponse.PushPromiseHandler?, - ): CompletableFuture> = sendAsync(request, responseBodyHandler) - - override fun sendAsync( - request: HttpRequest, - responseBodyHandler: HttpResponse.BodyHandler, - ): CompletableFuture> = CompletableFuture.supplyAsync { send(request, responseBodyHandler) } - - @Suppress("UNCHECKED_CAST") - override fun send(request: HttpRequest, responseBodyHandler: HttpResponse.BodyHandler): HttpResponse = - worker(request) as HttpResponse - - companion object { - fun fromMeta(responses: Map>): HttpClient = fromBuilders( - responses.mapValues { - it.value - .map> { metaBuilder -> - { req -> - val meta = metaBuilder() - MockedHttpResponse.fromRequest(req, meta.first, meta.second.mapValues { h -> listOf(h.value) }) - } - } - .toMutableList() - }, - ) - - fun fromBuilders(responses: Map>>): HttpClient = MockedHttpClient { req -> - responses[req.uri().toString()]?.removeFirst()?.invoke(req) ?: fail("Unknown response builders for ${req.uri()}") - } - } - } - - private class MockedHttpResponse( - private val request: HttpRequest, - private val statusCode: Int, - private val headers: HttpHeaders, - ) : HttpResponse { - override fun statusCode(): Int = statusCode - override fun request(): HttpRequest = request - override fun previousResponse(): Optional> = Optional.empty() - override fun headers(): HttpHeaders = headers - override fun body(): T? = null - override fun sslSession(): Optional = Optional.empty() - override fun uri(): URI = request().uri() - override fun version(): HttpClient.Version = request().version().orElse(HttpClient.Version.HTTP_1_1) - - companion object { - fun fromRequest(request: HttpRequest, statusCode: Int, headers: Map>): HttpResponse = MockedHttpResponse( - request, statusCode, HttpHeaders.of(headers) { _, _ -> true }, - ) + private companion object { + fun mockHttpClient(responses: Map>): OkHttpClient { + val interceptor = MockInterceptor() + + interceptor.addRule() + .anyTimes() + .answer { req -> + val uri = req.url.toUri() + val meta = ((responses[uri.toString()] ?: fail("Unknown URI: $uri")).removeFirstOrNull() ?: fail("Too many requests for URI: $uri"))() + okhttp3.Response.Builder() + .code(meta.first) + .headers(meta.second.toHeaders()) + .body("") + } + + return OkHttpClient.Builder() + .addInterceptor(interceptor) + .build(); } } diff --git a/src/test/resources/linkChecker/Demo.md b/src/test/resources/linkChecker/Demo.md index f26897b..362f0df 100644 --- a/src/test/resources/linkChecker/Demo.md +++ b/src/test/resources/linkChecker/Demo.md @@ -1,5 +1,5 @@ -Link with name: [named](https://ya.ru). -Link with name: [named](https://ya.ru/). +Link with name: [named](https://habr.com/ru/company/otus/blog/707724/comments). +Link with name: [named](https://habr.com/ru/company/otus/blog/707724/comments/). Link inlined http://schema.org. Link with rate limiting: * https://github.com/androidx/androidx/blob/androidx-main/build.gradle