diff --git a/skrape-core/src/main/kotlin/nolambda/skrape/Skrape.kt b/skrape-core/src/main/kotlin/nolambda/skrape/Skrape.kt index 14ef19d..68170b0 100644 --- a/skrape-core/src/main/kotlin/nolambda/skrape/Skrape.kt +++ b/skrape-core/src/main/kotlin/nolambda/skrape/Skrape.kt @@ -10,7 +10,7 @@ class Skrape( private val enableLog: Boolean = false ) { - fun request(page: Page): T { + fun request(page: Page, args: Map = emptyMap()): T { log { "Requesting $this" } return parser.adapt(page).also { log { "Result $it" } diff --git a/skrape-core/src/main/kotlin/nolambda/skrape/nodes/Node.kt b/skrape-core/src/main/kotlin/nolambda/skrape/nodes/Node.kt index 1ab0064..fc2e5ae 100644 --- a/skrape-core/src/main/kotlin/nolambda/skrape/nodes/Node.kt +++ b/skrape-core/src/main/kotlin/nolambda/skrape/nodes/Node.kt @@ -28,7 +28,7 @@ typealias ElementBody = ParentElement.() -> Unit /* > Parent Elements */ /* --------------------------------------------------- */ -class Page( +data class Page( val pageInfo: PageInfo, override var name: String = "", @Transient override val body: ElementBody @@ -37,49 +37,40 @@ class Page( constructor(path: String, baseUrl: String = "", body: ElementBody) : this(PageInfo(path, baseUrl), body = body) constructor(file: File, baseUrl: String = "", body: ElementBody) : this(file.path, baseUrl, body) - override fun toString(): String = "Page(pageInfo=$pageInfo, name='$name', body=$body)" - override var type: String = ElementName.ELEMENT_PAGE } -class Query( +data class Query( val cssSelector: String, override var name: String = "", @Transient override val body: ElementBody ) : ParentElement() { override var type: String = ElementName.ELEMENT_QUERY - override fun toString(): String = "Query(name='$name', cssSelector='$cssSelector')" } -class Container( +data class Container( override var name: String = "", @Transient override val body: ElementBody ) : ParentElement() { override var type: String = ElementName.ELEMENT_CONTAINER - override fun toString(): String = "Container(name='$name', body=$body)" } /* --------------------------------------------------- */ /* > Child Elements */ /* --------------------------------------------------- */ -class Attr( +data class Attr( override var name: String = "", val attrName: String ) : SkrapeElemenet() { override var type: String = ElementName.ELEMENT_ATTR - override fun toString(): String = "Attr(name='$name', attrName='$attrName')" } -class Value( +data class Value( override var name: String = "", @Transient val clazz: Class, val query: String = "" - ) : SkrapeElemenet() { +) : SkrapeElemenet() { override var type: String = ElementName.ELEMENT_VALUE - override fun toString(): String { - return "Value(name='$name', clazz=$clazz, query='$query')" - } - } diff --git a/skrape-core/src/main/kotlin/nolambda/skrape/transformer/PageTransformer.kt b/skrape-core/src/main/kotlin/nolambda/skrape/transformer/PageTransformer.kt new file mode 100644 index 0000000..e11a27e --- /dev/null +++ b/skrape-core/src/main/kotlin/nolambda/skrape/transformer/PageTransformer.kt @@ -0,0 +1,7 @@ +package nolambda.skrape.transformer + +import nolambda.skrape.nodes.Page + +interface PageTransformer { + fun transform(page: Page): Page +} \ No newline at end of file diff --git a/skrape-core/src/main/kotlin/nolambda/skrape/transformer/PlaceholderTransformer.kt b/skrape-core/src/main/kotlin/nolambda/skrape/transformer/PlaceholderTransformer.kt new file mode 100644 index 0000000..a745265 --- /dev/null +++ b/skrape-core/src/main/kotlin/nolambda/skrape/transformer/PlaceholderTransformer.kt @@ -0,0 +1,54 @@ +package nolambda.skrape.transformer + +import nolambda.skrape.nodes.* + +class PlaceholderTransformer( + private val args: Map +) : PageTransformer { + + companion object { + private val PLACEHOLDER_PATTERN = Regex(".*\\{\\{(.*)}}.*") + } + + override fun transform(page: Page): Page { + page.evaluate() + + val pageInfo = page.pageInfo + return page.copy(pageInfo = pageInfo.copy(path = pageInfo.path.replacePlaceholder())).apply { + setNewChildren(transformChildren(page.children)) + } + } + + private fun transformChildren(children: List): List { + return children.map { + when (it) { + is Query -> it.copy(cssSelector = it.cssSelector.replacePlaceholder()).also { query -> + query.setNewChildren(transformChildren(it.children)) + } + else -> it + } + } + } + + private fun ParentElement.setNewChildren(newChildren: List) { + children.clear() + children.addAll(newChildren) + } + + private fun String.replacePlaceholder(): String { + val results = PLACEHOLDER_PATTERN.findAll(this) + if (results.count() == 0) return this + + val finalResult = results.fold(this) { acc, result -> + val capturedKey = result.groupValues[1] + args[capturedKey]?.let { acc.replace("{{${capturedKey}}}", it) } ?: acc + } + + // Check if there's un-fulfilled placeholder + if (PLACEHOLDER_PATTERN.matches(finalResult)) { + throw IllegalArgumentException("Unfulfilled placeholder on: $this") + } + + return finalResult + } +} \ No newline at end of file diff --git a/skrape-core/src/test/kotlin/nolamda/skrape/PlaceholderTransformerSpec.kt b/skrape-core/src/test/kotlin/nolamda/skrape/PlaceholderTransformerSpec.kt new file mode 100644 index 0000000..afd8a22 --- /dev/null +++ b/skrape-core/src/test/kotlin/nolamda/skrape/PlaceholderTransformerSpec.kt @@ -0,0 +1,47 @@ +package nolamda.skrape + +import io.kotlintest.matchers.shouldBe +import io.kotlintest.matchers.shouldThrow +import io.kotlintest.specs.StringSpec +import nolambda.skrape.nodes.* +import nolambda.skrape.transformer.PlaceholderTransformer +import kotlin.to + +class PlaceholderTransformerSpec : StringSpec({ + + val transformer = PlaceholderTransformer(mapOf( + "ngasal" to "tweet", + "COBA" to "a" + )) + + val page = Page("https://ngasal.com/{{ngasal}}") { + query("td {{ngasal}}") { + "place" to text() + query("td {{COBA}}") { + "another" to text() + "place" to attr("href") + } + } + } + + val resultPage = transformer.transform(page) + + "it should replace path placeholder" { + val expectedPath = "https://ngasal.com/tweet" + resultPage.pageInfo.path shouldBe expectedPath + } + + "it should replace css selector placeholder" { + val expectedSelector = "td tweet" + + val query = resultPage.children.first() as Query + query.cssSelector shouldBe expectedSelector + } + + "it should throw if there's unfulfilled" { + val failingTransformer = PlaceholderTransformer(emptyMap()) + shouldThrow { + failingTransformer.transform(page) + } + } +}) \ No newline at end of file