-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper.rb
65 lines (50 loc) · 1.22 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# frozen_string_literal: true
# #!/bin/env ruby
# encoding: utf-8
require 'scraped'
require 'scraperwiki'
require 'nokogiri'
# require 'open-uri/cached'
# OpenURI::Cache.cache_path = '.cache'
require 'scraped_page_archive/open-uri'
class MembersPage < Scraped::HTML
decorator Scraped::Response::Decorator::CleanUrls
field :members do
noko.css('#data-anggota tbody tr').map do |tr|
fragment tr => MemberRow
end
end
end
class MemberRow < Scraped::HTML
field :id do
tds[1].css('a/@href').to_s.split('/').last
end
field :name do
tds[2].css('a').text
end
field :faction do
details[1]
end
field :area do
details[2]
end
field :image do
tds[1].css('img/@src').text
end
field :source do
url
end
private
def tds
noko.css('td')
end
def details
tds[2].inner_html.split('<br>')
end
end
url = 'http://dpr.go.id/en/anggota'
page = MembersPage.new(response: Scraped::Request.new(url: url).response)
data = page.members.map { |mem| mem.to_h.merge(term: 18) }
data.each { |mem| puts mem.reject { |_, v| v.to_s.empty? }.sort_by { |k, _| k }.to_h } if ENV['MORPH_DEBUG']
ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
ScraperWiki.save_sqlite([:id], data)