-
Notifications
You must be signed in to change notification settings - Fork 55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Change the way grouped transforms work #101
Changes from 1 commit
1965c7b
15df9ff
9c95239
1894c86
e48c355
2226c96
e08207b
bc72eb9
9ee9d22
b39ab11
105d846
0298c3d
e87647d
3ae01a0
0db98f1
cf79e39
2dea7c4
9a4d5c1
26cac9c
28a775b
6839b9b
dfc4a2f
df6503a
bdf4a79
6aea278
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -405,9 +405,29 @@ end | |
|
||
function _transform!(t::AbstractVector, first::AbstractVector, start::Int, | ||
g::GroupedDataFrame, v::Function, starts::Vector, ends::Vector) | ||
@inline function fill_column_vec!(t::AbstractVector, out, startpoint::Int, endpoint::Int, len::Int) | ||
if !(out isa AbstractVector) | ||
throw(ArgumentError("Return value must be an `AbstractVector` for all groups or" * | ||
"for none of them")) | ||
elseif length(out) != len | ||
throw(ArgumentError("If a function returns a vector, the result " * | ||
"must have the same length as the groups it operates on")) | ||
end | ||
elout = eltype(out) | ||
T = eltype(t) | ||
newtype = promote_type(elout, T) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe you can make things slightly faster by moving this call after Also There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wow thats really smart that julia allows for that. But alas there isn't any speed gain from this:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, too bad. |
||
if elout <: T || newtype <: T | ||
t[startpoint:endpoint] = out | ||
return nothing | ||
else | ||
return newtype | ||
end | ||
return nothing | ||
end | ||
|
||
# handle the first case | ||
newtype = fill_column_vec!(t, first, starts[start], ends[start], size(g[start], 1)) | ||
@assert newtype === nothing | ||
newtype_first = fill_column_vec!(t, first, starts[start], ends[start], size(g[start], 1)) | ||
#@assert newtype_first === nothing | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't expect this check to be costly. Is it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry. no its not. |
||
@inbounds for i in (start+1):length(g) | ||
out = v(g[i]) | ||
newtype = fill_column_vec!(t, out, starts[i], ends[i], size(g[i], 1)) | ||
|
@@ -422,9 +442,24 @@ end | |
|
||
function _transform!(t::AbstractVector, first::Any, start::Int, | ||
g::GroupedDataFrame, v::Function, starts::Vector, ends::Vector) | ||
@inline function fill_column_any!(t::AbstractVector, out, startpoint::Int, endpoint::Int) | ||
if out isa AbstractVector | ||
throw(ArgumentError("Return value must be an `AbstractVector` for all groups or" * | ||
"for none of them")) | ||
end | ||
typout = typeof(out) | ||
T = eltype(t) | ||
newtype = promote_type(typout, T) | ||
if typout <: T || newtype <: T | ||
t[startpoint:endpoint] .= Ref(out) | ||
return nothing | ||
else | ||
return newtype | ||
end | ||
end | ||
# handle the first case | ||
newtype = fill_column_any!(t, first, starts[start], ends[start]) | ||
@assert newtype === nothing | ||
newtype_first = fill_column_any!(t, first, starts[start], ends[start]) | ||
#@assert newtype_first === nothing | ||
@inbounds for i in (start+1):length(g) | ||
out = v(g[i]) | ||
newtype = fill_column_any!(t, out, starts[i], ends[i]) | ||
|
@@ -435,42 +470,8 @@ function _transform!(t::AbstractVector, first::Any, start::Int, | |
end | ||
end | ||
return t | ||
end | ||
|
||
function fill_column_vec!(t::AbstractVector, out, startpoint::Int, endpoint::Int, len::Int) | ||
if !(out isa AbstractVector) | ||
throw(ArgumentError("Return value must be an `AbstractVector` for all groups or" * | ||
"for none of them")) | ||
elseif length(out) != len | ||
throw(ArgumentError("If a function returns a vector, the result " * | ||
"must have the same length as the groups it operates on")) | ||
end | ||
elout = eltype(out) | ||
T = eltype(t) | ||
newtype = promote_type(elout, T) | ||
if elout <: T || newtype <: T | ||
t[startpoint:endpoint] = out | ||
return nothing | ||
else | ||
return newtype | ||
end | ||
end | ||
|
||
function fill_column_any!(t::AbstractVector, out, startpoint::Int, endpoint::Int) | ||
if out isa AbstractVector | ||
throw(ArgumentError("Return value must be an `AbstractVector` for all groups or" * | ||
"for none of them")) | ||
end | ||
typout = typeof(out) | ||
T = eltype(t) | ||
newtype = promote_type(typout, T) | ||
if typout <: T || newtype <: T | ||
t[startpoint:endpoint] .= Ref(out) | ||
return nothing | ||
else | ||
return newtype | ||
end | ||
end | ||
end | ||
|
||
function transform_helper(x, args...) | ||
quote | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What I mean when I said you could move the code inside the function is that if you use
@inline
, you can just drop the function barrier and put the code directly in the parent function.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should probably still be a function, since we call it twice per
_transform!
function. Once for the first case and once for the rest. We need to call it twice because we have to computefirst
intransform(::GroupedDataFrame,...)
so not treating the first case separately would require calculatingfirst
twice.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. Should be OK as-is then.