-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfile_extractor.rb
59 lines (45 loc) · 1.64 KB
/
file_extractor.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# frozen_string_literal: true
require 'pathname'
module Chronicle
module ETL
# Return filenames that match a pattern in a directory
class FileExtractor < Chronicle::ETL::Extractor
register_connector do |r|
r.identifier = :file
r.description = 'file or directory of files'
end
setting :input, default: ['.']
setting :dir_glob_pattern, default: '**/*'
setting :larger_than
setting :smaller_than
def prepare
@pathnames = gather_files
end
def extract
@pathnames.each do |pathname|
yield Chronicle::ETL::Extraction.new(data: pathname.to_path)
end
end
def results_count
@pathnames.count
end
private
def gather_files
roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
raise(ExtractionError, 'Input must exist') unless roots.all?(&:exist?)
directories, files = roots.partition(&:directory?)
directories.each do |directory|
files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) }
end
files = files.uniq
files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since
files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until
# pass in file sizes in bytes
files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than
files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than
# # TODO: incorporate sort argument
files.sort_by(&:mtime)
end
end
end
end