-
-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
adds table transforms #45
Changes from 13 commits
2fa8f76
c1767c5
2e0b87c
7a57684
3f8db5b
bfaec10
90c3bae
eef71ec
cd42f1d
38f35a0
a033d1d
5a40475
0de742b
128cf97
be5dbff
e90dc4b
1a22791
a03bc72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
struct TabularItem{T} <: Item | ||
data::T | ||
columns | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
struct NormalizeRow{T, S} <: Transform | ||
dict::T | ||
cols::S | ||
end | ||
|
||
struct FillMissing{T, S} <: Transform | ||
dict::T | ||
cols::S | ||
end | ||
|
||
struct Categorify{T, S} <: Transform | ||
dict::T | ||
cols::S | ||
function Categorify{T, S}(dict::T, cols::S) where {T, S} | ||
for (col, vals) in dict | ||
if any(ismissing, vals) | ||
dict[col] = filter(!ismissing, vals) | ||
@warn "There is a missing value present for category '$col' which will be removed from Categorify dict" | ||
end | ||
end | ||
new{T, S}(dict, cols) | ||
end | ||
end | ||
|
||
Categorify(dict::T, cols::S) where {T, S} = Categorify{T, S}(dict, cols) | ||
|
||
function apply(tfm::NormalizeRow, item::TabularItem; randstate=nothing) | ||
x = NamedTuple(Iterators.map(item.columns, item.data) do col, val | ||
if col in tfm.cols | ||
colmean, colstd = tfm.dict[col] | ||
val = (val - colmean)/colstd | ||
end | ||
(col, val) | ||
end) | ||
TabularItem(x, item.columns) | ||
end | ||
|
||
function apply(tfm::FillMissing, item::TabularItem; randstate=nothing) | ||
x = NamedTuple(Iterators.map(item.columns, item.data) do col, val | ||
if col in tfm.cols && ismissing(val) | ||
val = tfm.dict[col] | ||
end | ||
(col, val) | ||
end) | ||
TabularItem(x, item.columns) | ||
end | ||
|
||
function apply(tfm::Categorify, item::TabularItem; randstate=nothing) | ||
x = NamedTuple(Iterators.map(item.columns, item.data) do col, val | ||
if col in tfm.cols | ||
val = ismissing(val) ? 1 : findfirst(val .== tfm.dict[col]) + 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can just be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see what you mean. An equality comparison with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think as a result of this, the whole storing missing in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, there's no need to store There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that's what I meant. Filter the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alright, I have updated the constructor to use |
||
end | ||
(col, val) | ||
end) | ||
TabularItem(x, item.columns) | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And all the other transforms too