Skip to content

Commit

Permalink
fix(crawler/fl): adapt list format introduced on 21/01/29
Browse files Browse the repository at this point in the history
  • Loading branch information
kkkrist committed Jan 29, 2021
1 parent 8a62a79 commit 3286a2d
Showing 1 changed file with 24 additions and 11 deletions.
35 changes: 24 additions & 11 deletions packages/crawler/lib/fl.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,26 @@ const JSDOM = require('jsdom').JSDOM
const fetchOptions = require('./fetch-options.json')

const rDate = /^([0-9]+)\.([0-9]+)\.([0-9]+)$/
const rDeaths = /([0-9]+)[\D]+Verst(?:or|ro)?ben/i
const rInfected = /([0-9]+)[\D]+Infizierte/
const rQuarantined = /([0-9]+)[\D]+ (?<!Lehrer )in Qua?rantäne/
const rRecovered = /([0-9]+)[\D]+gen?en?sen/
const rDeaths = [/verstorben: ([0-9]+)/i, /([0-9]+)[\D]+Verst(?:or|ro)?ben/i]
const rInfected = [/Positive gesamt: ([0-9]+)/i, /([0-9]+)[\D]+Infizierte/]
const rQuarantined = [
/Quarantänefälle: ([0-9]+)/i,
/([0-9]+)[\D]+ (?<!Lehrer )in Qua?rantäne/
]
const rRecovered = [/genesen: ([0-9]+)/i, /([0-9]+)[\D]+gen?en?sen/]

const matcher = (str, r) => {
if (Array.isArray(r)) {
let match
r.every(re => !(match = str.match(re)))
return match
}

return str.match(r)
}

const getMatch = (el, regex, isOptional) => {
let match = el.textContent.match(regex)
let match = matcher(el.textContent, regex)

if (match) {
return match
Expand All @@ -20,7 +33,7 @@ const getMatch = (el, regex, isOptional) => {
let nextEl = el.previousElementSibling || el.parentElement

while (nextEl && !match) {
match = nextEl.textContent.match(regex)
match = matcher(nextEl.textContent, regex)
nextEl = nextEl.previousElementSibling || nextEl.parentElement
}

Expand Down Expand Up @@ -62,17 +75,17 @@ const getRecord = el => {
}

const reducer = (acc, el) => {
if (el.childElementCount > 0) {
if (el.nodeName !== 'P' && el.childElementCount > 0) {
return [...el.childNodes].reduce(reducer, acc)
}

const str = el.textContent

if (
rInfected.test(str) &&
rRecovered.test(str) &&
rQuarantined.test(str) &&
rDeaths.test(str)
matcher(str, rInfected) &&
matcher(str, rRecovered) &&
matcher(str, rQuarantined) &&
matcher(str, rDeaths)
) {
return [...acc, getRecord(el)]
}
Expand Down

0 comments on commit 3286a2d

Please sign in to comment.