Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Limit lines #101

Merged
merged 11 commits into from
Dec 17, 2014
1 change: 0 additions & 1 deletion lib/csvlint.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
require "csvlint/version"
require 'csv'
require 'open-uri'
require 'mime/types'
Expand Down
48 changes: 15 additions & 33 deletions lib/csvlint/error_collector.rb
Original file line number Diff line number Diff line change
@@ -1,43 +1,25 @@
module Csvlint

module ErrorCollector

def build_message(type, category, row, column, content, constraints)
Csvlint::ErrorMessage.new({
:type => type,
:category => category,
:row => row,
:column => column,
:content => content,
:constraints => constraints
})
attr_reader :errors, :warnings, :info_messages

def build_errors(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
@errors << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
end

MESSAGE_LEVELS = [
:errors,
:warnings,
:info_messages
]

MESSAGE_LEVELS.each do |level|

attr_reader level

define_method "build_#{level}" do |type, category = nil, row = nil, column = nil, content = nil, constraints = {}|
instance_variable_get("@#{level}") << build_message(type, category, row, column, content, constraints)
end

def build_warnings(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
@warnings << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
end

def build_info_messages(type, category = nil, row = nil, column = nil, content = nil, constraints = {})
@info_messages << Csvlint::ErrorMessage.new(type, category, row, column, content, constraints)
end

def valid?
errors.empty?
end

def reset
MESSAGE_LEVELS.each do |level|
instance_variable_set("@#{level}", [])
end
@errors = []
@warnings = []
@info_messages = []
end

end
end
end
19 changes: 9 additions & 10 deletions lib/csvlint/error_message.rb
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
module Csvlint

class ErrorMessage

attr_reader :type, :category, :row, :column, :content, :constraints

def initialize(params)
params.each do |key, value|
self.instance_variable_set("@#{key}".to_sym, value)
end

def initialize(type, category, row, column, content, constraints)
@type = type
@category = category
@row = row
@column = column
@content = content
@constraints = constraints
end

end

end
end
92 changes: 58 additions & 34 deletions lib/csvlint/types.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,79 @@
require 'active_support/core_ext/time/conversions'

module Csvlint

module Types

SIMPLE_FORMATS = {
'string' => lambda { |value, constraints| value },
'numeric' => lambda do |value, constraints|
begin
Integer value
rescue ArgumentError
Float value
'string' => lambda { |value| true },
'numeric' => lambda { |value| value.strip[/\A[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?\z/] },
'uri' => lambda do |value|
if value.strip[/\Ahttps?:/]
u = URI.parse(value)
u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
end
end,
'uri' => lambda do |value, constraints|
u = URI.parse value
raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
u
end
}

def self.date_format(klass = DateTime, value, type)
date = klass.strptime(value, klass::DATE_FORMATS[type])
raise ArgumentError unless date.to_formatted_s(type) == value

def self.date_format(klass, value, format, pattern)
if value[pattern]
klass.strptime(value, format).strftime(format) == value
end
end

def self.included(base)
Time::DATE_FORMATS[:iso8601] = "%Y-%m-%dT%H:%M:%SZ"
Time::DATE_FORMATS[:hms] = "%H:%M:%S"

Date::DATE_FORMATS.each do |type|
SIMPLE_FORMATS["date_#{type.first}"] = lambda do |value, constraints|
date_format(Date, value, type.first)
[
[ :db, "%Y-%m-%d",
/\A\d{4,}-\d\d-\d\d\z/],
[ :number, "%Y%m%d",
/\A\d{8}\z/],
[ :short, "%e %b",
/\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')})\z/],
[ :rfc822, "%e %b %Y",
/\A[ \d]\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d{4,}\z/],
[ :long, "%B %e, %Y",
/\A(?:#{Date::MONTHNAMES.join('|')}) [ \d]\d, \d{4,}\z/],
].each do |type,format,pattern|
SIMPLE_FORMATS["date_#{type}"] = lambda do |value|
date_format(Date, value, format, pattern)
end
end

Time::DATE_FORMATS.each do |type|
SIMPLE_FORMATS["dateTime_#{type.first}"] = lambda do |value, constraints|
date_format(Time, value, type.first)

# strptime doesn't support widths like %9N, unlike strftime.
# @see http://ruby-doc.org/stdlib-2.0/libdoc/date/rdoc/DateTime.html
[
[ :time, "%H:%M",
/\A\d\d:\d\d\z/],
[ :hms, "%H:%M:%S",
/\A\d\d:\d\d:\d\d\z/],
[ :db, "%Y-%m-%d %H:%M:%S",
/\A\d{4,}-\d\d-\d\d \d\d:\d\d:\d\d\z/],
[ :iso8601, "%Y-%m-%dT%H:%M:%SZ",
/\A\d{4,}-\d\d-\d\dT\d\d:\d\d:\d\dZ\z/],
[ :number, "%Y%m%d%H%M%S",
/\A\d{14}\z/],
[ :nsec, "%Y%m%d%H%M%S%N",
/\A\d{23}\z/],
[ :short, "%d %b %H:%M",
/\A\d\d (?:#{Date::ABBR_MONTHNAMES.join('|')}) \d\d:\d\d\z/],
[ :long, "%B %d, %Y %H:%M",
/\A(?:#{Date::MONTHNAMES.join('|')}) \d\d, \d{4,} \d\d:\d\d\z/],
].each do |type,format,pattern|
SIMPLE_FORMATS["dateTime_#{type}"] = lambda do |value|
date_format(Time, value, format, pattern)
end
end
end

TYPE_VALIDATIONS = {
'http://www.w3.org/2001/XMLSchema#string' => SIMPLE_FORMATS['string'],
'http://www.w3.org/2001/XMLSchema#string' => lambda { |value, constraints| value },
'http://www.w3.org/2001/XMLSchema#int' => lambda { |value, constraints| Integer value },
'http://www.w3.org/2001/XMLSchema#integer' => lambda { |value, constraints| Integer value },
'http://www.w3.org/2001/XMLSchema#float' => lambda { |value, constraints| Float value },
'http://www.w3.org/2001/XMLSchema#double' => lambda { |value, constraints| Float value },
'http://www.w3.org/2001/XMLSchema#anyURI' => SIMPLE_FORMATS['uri'],
'http://www.w3.org/2001/XMLSchema#anyURI' => lambda do |value, constraints|
u = URI.parse value
raise ArgumentError unless u.kind_of?(URI::HTTP) || u.kind_of?(URI::HTTPS)
u
end,
'http://www.w3.org/2001/XMLSchema#boolean' => lambda do |value, constraints|
return true if ['true', '1'].include? value
return false if ['false', '0'].include? value
Expand Down Expand Up @@ -100,14 +125,13 @@ def self.included(base)
d = Date.strptime(value, date_pattern)
raise ArgumentError unless d.strftime(date_pattern) == value
d
end,
end,
'http://www.w3.org/2001/XMLSchema#gYearMonth' => lambda do |value, constraints|
date_pattern = constraints["datePattern"] || "%Y-%m"
d = Date.strptime(value, date_pattern)
raise ArgumentError unless d.strftime(date_pattern) == value
d
end
end,
}
end

end
48 changes: 24 additions & 24 deletions lib/csvlint/validate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ def initialize(source, dialect = nil, schema = nil)

@supplied_dialect = dialect != nil

@dialect = dialect_defaults = {
@dialect = {
"header" => true,
"delimiter" => ",",
"skipInitialSpace" => true,
"lineTerminator" => :auto,
"limitLines" => 0,
"quoteChar" => '"'
}.merge(dialect || {})

@csv_header = @dialect["header"]
@limit_lines = @dialect["limitLines"]
@csv_options = dialect_to_csv_options(@dialect)
@extension = parse_extension(source)
reset
Expand All @@ -46,7 +47,7 @@ def validate
validate_metadata(io)
parse_csv(io)
unless @col_counts.inject(:+).nil?
build_warnings(:title_row, :structure) if @col_counts.first < (@col_counts.inject(:+) / @col_counts.count)
build_warnings(:title_row, :structure) if @col_counts.first < (@col_counts.inject(:+) / @col_counts.size)
end
build_warnings(:check_options, :structure) if @expected_columns == 1
check_consistency
Expand Down Expand Up @@ -111,36 +112,38 @@ def parse_csv(io)
row = nil
loop do
current_line = current_line + 1
if @limit_lines > 0 && current_line >= @limit_lines
break
end
begin
wrapper.reset_line
row = csv.shift
@data << row
wrapper.finished
if row
if header? && current_line == 1
if current_line == 1 && header?
row = row.reject {|r| r.blank? }
validate_header(row)
@col_counts << row.count
@col_counts << row.size
else
build_formats(row, current_line)
@col_counts << row.reject {|r| r.blank? }.count
@expected_columns = row.count unless @expected_columns != 0
@col_counts << row.reject {|r| r.blank? }.size
@expected_columns = row.size unless @expected_columns != 0

build_errors(:blank_rows, :structure, current_line, nil, wrapper.line) if row.reject{ |c| c.nil? || c.empty? }.count == 0
build_errors(:blank_rows, :structure, current_line, nil, wrapper.line) if row.reject{ |c| c.nil? || c.empty? }.size == 0

if @schema
@schema.validate_row(row, current_line)
@errors += @schema.errors
@warnings += @schema.warnings
else
build_errors(:ragged_rows, :structure, current_line, nil, wrapper.line) if !row.empty? && row.count != @expected_columns
build_errors(:ragged_rows, :structure, current_line, nil, wrapper.line) if !row.empty? && row.size != @expected_columns
end

end
else
break
end
rescue CSV::MalformedCSVError => e
wrapper.finished
type = fetch_error(e)
if type == :stray_quote && !wrapper.line.match(csv.row_sep)
build_errors(:line_breaks, :structure)
Expand All @@ -150,7 +153,6 @@ def parse_csv(io)
end
end
rescue ArgumentError => ae
wrapper.finished
build_errors(:invalid_encoding, :structure, current_line, wrapper.line) unless reported_invalid_encoding
reported_invalid_encoding = true
end
Expand All @@ -175,7 +177,7 @@ def validate_header(header)
end

def header?
return @csv_header
@csv_header
end

def fetch_error(error)
Expand Down Expand Up @@ -203,10 +205,10 @@ def build_formats(row, line)

SIMPLE_FORMATS.each do |type, lambda|
begin
lambda.call(col, {})
@format = type
rescue => e
nil
if lambda.call(col)
@format = type
end
rescue ArgumentError, URI::InvalidURIError
end
end

Expand All @@ -217,13 +219,11 @@ def build_formats(row, line)
def check_consistency
percentages = []

formats = SIMPLE_FORMATS.map {|type, lambda| type }

formats.each do |type, regex|
@formats.count.times do |i|
SIMPLE_FORMATS.keys.each do |type|
@formats.each_with_index do |format,i|
percentages[i] ||= {}
unless @formats[i].nil?
percentages[i][type] = @formats[i].grep(/^#{type}$/).count.to_f / @formats[i].count.to_f
unless format.nil?
percentages[i][type] = format.count(type) / format.size.to_f
end
end
end
Expand Down Expand Up @@ -253,4 +253,4 @@ def parse_extension(source)
end

end
end
end
Loading