Skip to content

Commit

Permalink
Merge pull request #8 from esafirm/placeholder_transformer
Browse files Browse the repository at this point in the history
Placeholder transformer
  • Loading branch information
esafirm authored May 28, 2020
2 parents 705b596 + be7ff77 commit bdc452f
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 16 deletions.
2 changes: 1 addition & 1 deletion skrape-core/src/main/kotlin/nolambda/skrape/Skrape.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Skrape<out T : SkrapeResult>(
private val enableLog: Boolean = false
) {

fun request(page: Page): T {
fun request(page: Page, args: Map<String, String> = emptyMap()): T {
log { "Requesting $this" }
return parser.adapt(page).also {
log { "Result $it" }
Expand Down
21 changes: 6 additions & 15 deletions skrape-core/src/main/kotlin/nolambda/skrape/nodes/Node.kt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ typealias ElementBody = ParentElement.() -> Unit
/* > Parent Elements */
/* --------------------------------------------------- */

class Page(
data class Page(
val pageInfo: PageInfo,
override var name: String = "",
@Transient override val body: ElementBody
Expand All @@ -37,49 +37,40 @@ class Page(
constructor(path: String, baseUrl: String = "", body: ElementBody) : this(PageInfo(path, baseUrl), body = body)
constructor(file: File, baseUrl: String = "", body: ElementBody) : this(file.path, baseUrl, body)

override fun toString(): String = "Page(pageInfo=$pageInfo, name='$name', body=$body)"

override var type: String = ElementName.ELEMENT_PAGE
}


class Query(
data class Query(
val cssSelector: String,
override var name: String = "",
@Transient override val body: ElementBody
) : ParentElement() {
override var type: String = ElementName.ELEMENT_QUERY
override fun toString(): String = "Query(name='$name', cssSelector='$cssSelector')"
}

class Container(
data class Container(
override var name: String = "",
@Transient override val body: ElementBody
) : ParentElement() {
override var type: String = ElementName.ELEMENT_CONTAINER
override fun toString(): String = "Container(name='$name', body=$body)"
}

/* --------------------------------------------------- */
/* > Child Elements */
/* --------------------------------------------------- */

class Attr(
data class Attr(
override var name: String = "",
val attrName: String
) : SkrapeElemenet() {
override var type: String = ElementName.ELEMENT_ATTR
override fun toString(): String = "Attr(name='$name', attrName='$attrName')"
}

class Value<T : Any>(
data class Value<T : Any>(
override var name: String = "",
@Transient val clazz: Class<T>,
val query: String = ""
) : SkrapeElemenet() {
) : SkrapeElemenet() {
override var type: String = ElementName.ELEMENT_VALUE
override fun toString(): String {
return "Value(name='$name', clazz=$clazz, query='$query')"
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package nolambda.skrape.transformer

import nolambda.skrape.nodes.Page

interface PageTransformer<T> {
fun transform(page: Page): Page
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package nolambda.skrape.transformer

import nolambda.skrape.nodes.*

class PlaceholderTransformer(
private val args: Map<String, String>
) : PageTransformer<Page> {

companion object {
private val PLACEHOLDER_PATTERN = Regex(".*\\{\\{(.*)}}.*")
}

override fun transform(page: Page): Page {
page.evaluate()

val pageInfo = page.pageInfo
return page.copy(pageInfo = pageInfo.copy(path = pageInfo.path.replacePlaceholder())).apply {
setNewChildren(transformChildren(page.children))
}
}

private fun transformChildren(children: List<SkrapeElemenet>): List<SkrapeElemenet> {
return children.map {
when (it) {
is Query -> it.copy(cssSelector = it.cssSelector.replacePlaceholder()).also { query ->
query.setNewChildren(transformChildren(it.children))
}
else -> it
}
}
}

private fun ParentElement.setNewChildren(newChildren: List<SkrapeElemenet>) {
children.clear()
children.addAll(newChildren)
}

private fun String.replacePlaceholder(): String {
val results = PLACEHOLDER_PATTERN.findAll(this)
if (results.count() == 0) return this

val finalResult = results.fold(this) { acc, result ->
val capturedKey = result.groupValues[1]
args[capturedKey]?.let { acc.replace("{{${capturedKey}}}", it) } ?: acc
}

// Check if there's un-fulfilled placeholder
if (PLACEHOLDER_PATTERN.matches(finalResult)) {
throw IllegalArgumentException("Unfulfilled placeholder on: $this")
}

return finalResult
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package nolamda.skrape

import io.kotlintest.matchers.shouldBe
import io.kotlintest.matchers.shouldThrow
import io.kotlintest.specs.StringSpec
import nolambda.skrape.nodes.*
import nolambda.skrape.transformer.PlaceholderTransformer
import kotlin.to

class PlaceholderTransformerSpec : StringSpec({

val transformer = PlaceholderTransformer(mapOf(
"ngasal" to "tweet",
"COBA" to "a"
))

val page = Page("https://ngasal.com/{{ngasal}}") {
query("td {{ngasal}}") {
"place" to text()
query("td {{COBA}}") {
"another" to text()
"place" to attr("href")
}
}
}

val resultPage = transformer.transform(page)

"it should replace path placeholder" {
val expectedPath = "https://ngasal.com/tweet"
resultPage.pageInfo.path shouldBe expectedPath
}

"it should replace css selector placeholder" {
val expectedSelector = "td tweet"

val query = resultPage.children.first() as Query
query.cssSelector shouldBe expectedSelector
}

"it should throw if there's unfulfilled" {
val failingTransformer = PlaceholderTransformer(emptyMap())
shouldThrow<IllegalArgumentException> {
failingTransformer.transform(page)
}
}
})

0 comments on commit bdc452f

Please sign in to comment.