Skip to content

Commit

Permalink
fix(crawler): rewrite "lg" area code crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
kkkrist committed Nov 6, 2020
1 parent d929af8 commit 4adc882
Showing 1 changed file with 43 additions and 80 deletions.
123 changes: 43 additions & 80 deletions packages/crawler/lib/lg.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,6 @@
const fetch = require('node-fetch')
const jsdom = require('jsdom').JSDOM

const numberStrings = {
Januar: 1,
Februar: 2,
März: 3,
April: 4,
Mai: 5,
Juni: 6,
Juli: 7,
August: 8,
September: 9,
Oktober: 10,
November: 11,
Dezember: 12
}

const getInt = val => {
if (val !== null && val !== undefined && !isNaN(val)) {
return Number(val)
}

const key = Object.keys(numberStrings).find(str =>
new RegExp(str, 'i').test(val)
)

if (key) {
return numberStrings[key]
}

throw new Error(`Couldn't convert string to number: ${val}`)
}

module.exports = () =>
Promise.all([
jsdom.fromURL('https://spezial.lklg.net/?p=64'),
Expand All @@ -56,59 +25,53 @@ module.exports = () =>
}
]
}
]) => {
const elements = dom.window.document.querySelectorAll(
'.row:not(.container) > div > *'
)

let index = 0
let content = ''

while (index < elements.length && elements[index].tagName !== 'TABLE') {
content += elements[index].textContent
index++
}

const dateMatch = content.match(
/\+\+\+ Update ([0-9]+)\. ([A-Za-z]+) ([0-9]+)/
)

const infectedMatch = content.match(
/gesamt[\D]+ ([0-9]+) [\D]+ gemeldet/i
)

const recoveredMatch = content.match(/([0-9]+)[\D]+ genesen/)

if (!dateMatch) {
throw new Error(`Couldn't parse date string "${content[1]}"`)
}
]) =>
[...dom.window.document.querySelectorAll('tr')]
.slice(2)
.reduce((acc, row) => {
const dateMatch = row.children[0].textContent.match(
/([0-9]+)\.([0-9]+)\.([0-9]+)/
)

const infectedMatch = row.children[3].textContent.match(/([0-9.]+)/)

const recoveredMatch = row.children[4].textContent.match(/([0-9.]+)/)

if (!dateMatch) {
throw new Error(
`Couldn't parse date string "${row.children[0].textContent}"`
)
}

if (!infectedMatch) {
throw new Error(`Couldn't parse infected string "${content[2]}"`)
}
if (!infectedMatch) {
throw new Error(
`Couldn't parse infected string "${row.children[3].textContent}"`
)
}

if (!recoveredMatch) {
throw new Error(`Couldn't parse recovered string "${content[4]}"`)
}
if (!recoveredMatch) {
throw new Error(
`Couldn't parse recovered string "${row.children[4].textContent}"`
)
}

const entry = {
areacode: 'lg',
date: new Date(
`${dateMatch[3]}-${getInt(dateMatch[2])}-${dateMatch[1]}`
).toISOString(),
deaths,
infected: Number(infectedMatch[1]),
quarantined: null,
recovered: Number(recoveredMatch[1])
}
const entry = {
areacode: 'lg',
date: `${dateMatch[3]}-${dateMatch[2]}-${dateMatch[1]}T00:00:00.000Z`,
deaths,
infected: Number(infectedMatch[1]),
quarantined: null,
recovered: Number(recoveredMatch[1])
}

return [
{
...entry,
active: entry.infected - entry.recovered - entry.deaths
}
]
},
return [
...acc,
{
...entry,
active: entry.infected - entry.recovered - entry.deaths
}
]
}, []),
error => {
throw error
}
Expand Down

0 comments on commit 4adc882

Please sign in to comment.