Skip to content

Commit

Permalink
CLN: remove build some warnings
Browse files Browse the repository at this point in the history
CLN: from algos_rank_helper.pxi.in, algos_groupby_helper.pxi.in
CLN: hashtable warns
CLN: parser warnings

closes pandas-dev#15190

Author: Jeff Reback <[email protected]>

Closes pandas-dev#15259 from jreback/warn and squashes the following commits:

12cc061 [Jeff Reback] CLN: remove some warnings from algos_rank_helper.pxi.in CLN: and algos_groupby_helper.pxi.in CLN: hashtable warns CLN: parser warnings
  • Loading branch information
jreback committed Jan 30, 2017
1 parent 7bb4980 commit b1c3c48
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 59 deletions.
47 changes: 18 additions & 29 deletions pandas/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -716,11 +716,10 @@ cdef class TextReader:
# header is now a list of lists, so field_count should use header[0]

cdef:
size_t i, start, data_line, field_count, passed_count, hr, unnamed_count # noqa
Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa
char *word
object name
int status
Py_ssize_t size
int status, hr, data_line
char *errors = "strict"
cdef StringPath path = _string_path(self.c_encoding)

Expand Down Expand Up @@ -1416,8 +1415,7 @@ cdef _string_box_factorize(parser_t *parser, int col,
bint na_filter, kh_str_t *na_hashset):
cdef:
int error, na_count = 0
Py_ssize_t i
size_t lines
Py_ssize_t i, lines
coliter_t it
const char *word = NULL
ndarray[object] result
Expand Down Expand Up @@ -1470,8 +1468,7 @@ cdef _string_box_utf8(parser_t *parser, int col,
bint na_filter, kh_str_t *na_hashset):
cdef:
int error, na_count = 0
Py_ssize_t i
size_t lines
Py_ssize_t i, lines
coliter_t it
const char *word = NULL
ndarray[object] result
Expand Down Expand Up @@ -1525,8 +1522,7 @@ cdef _string_box_decode(parser_t *parser, int col,
char *encoding):
cdef:
int error, na_count = 0
Py_ssize_t i, size
size_t lines
Py_ssize_t i, size, lines
coliter_t it
const char *word = NULL
ndarray[object] result
Expand Down Expand Up @@ -1586,8 +1582,7 @@ cdef _categorical_convert(parser_t *parser, int col,
"Convert column data into codes, categories"
cdef:
int error, na_count = 0
Py_ssize_t i, size
size_t lines
Py_ssize_t i, size, lines
coliter_t it
const char *word = NULL

Expand Down Expand Up @@ -1691,7 +1686,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
bint na_filter, kh_str_t *na_hashset, object na_flist):
cdef:
int error, na_count = 0
size_t i, lines
Py_ssize_t i, lines
coliter_t it
const char *word = NULL
char *p_end
Expand Down Expand Up @@ -1738,8 +1733,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
int *na_count) nogil:
cdef:
int error,
size_t i
size_t lines = line_end - line_start
Py_ssize_t i, lines = line_end - line_start
coliter_t it
const char *word = NULL
char *p_end
Expand Down Expand Up @@ -1801,7 +1795,7 @@ cdef _try_uint64(parser_t *parser, int col, int line_start, int line_end,
bint na_filter, kh_str_t *na_hashset):
cdef:
int error
size_t i, lines
Py_ssize_t i, lines
coliter_t it
uint64_t *data
ndarray result
Expand Down Expand Up @@ -1837,8 +1831,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int col, int line_start,
uint64_t *data, uint_state *state) nogil:
cdef:
int error
size_t i
size_t lines = line_end - line_start
Py_ssize_t i, lines = line_end - line_start
coliter_t it
const char *word = NULL
khiter_t k
Expand Down Expand Up @@ -1873,7 +1866,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
bint na_filter, kh_str_t *na_hashset):
cdef:
int error, na_count = 0
size_t i, lines
Py_ssize_t i, lines
coliter_t it
int64_t *data
ndarray result
Expand Down Expand Up @@ -1902,8 +1895,7 @@ cdef inline int _try_int64_nogil(parser_t *parser, int col, int line_start,
int64_t *data, int *na_count) nogil:
cdef:
int error
size_t i
size_t lines = line_end - line_start
Py_ssize_t i, lines = line_end - line_start
coliter_t it
const char *word = NULL
khiter_t k
Expand Down Expand Up @@ -1939,7 +1931,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
bint na_filter, kh_str_t *na_hashset):
cdef:
int na_count
size_t lines = line_end - line_start
Py_ssize_t lines = line_end - line_start
uint8_t *data
cnp.ndarray[cnp.uint8_t, ndim=1] result

Expand All @@ -1963,8 +1955,7 @@ cdef inline int _try_bool_nogil(parser_t *parser, int col, int line_start,
uint8_t *data, int *na_count) nogil:
cdef:
int error
size_t lines = line_end - line_start
size_t i
Py_ssize_t i, lines = line_end - line_start
coliter_t it
const char *word = NULL
khiter_t k
Expand Down Expand Up @@ -2004,7 +1995,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
const kh_str_t *false_hashset):
cdef:
int error, na_count = 0
size_t i, lines
Py_ssize_t i, lines
coliter_t it
const char *word = NULL
uint8_t *data
Expand Down Expand Up @@ -2033,8 +2024,7 @@ cdef inline int _try_bool_flex_nogil(parser_t *parser, int col, int line_start,
int *na_count) nogil:
cdef:
int error = 0
size_t i
size_t lines = line_end - line_start
Py_ssize_t i, lines = line_end - line_start
coliter_t it
const char *word = NULL
khiter_t k
Expand Down Expand Up @@ -2249,8 +2239,7 @@ cdef _apply_converter(object f, parser_t *parser, int col,
char* c_encoding):
cdef:
int error
Py_ssize_t i
size_t lines
Py_ssize_t i, lines
coliter_t it
const char *word = NULL
char *errors = "strict"
Expand Down Expand Up @@ -2341,7 +2330,7 @@ def _to_structured_array(dict columns, object names, object usecols):
cdef _fill_structured_column(char *dst, char* src, int elsize,
int stride, int length, bint incref):
cdef:
size_t i
Py_ssize_t i

if incref:
util.transfer_object_column(dst, src, stride, length)
Expand Down
53 changes: 45 additions & 8 deletions pandas/src/algos_groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,11 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:
{{endif}}
nobs[lab, j] += 1
resx[lab, j] = val

Expand Down Expand Up @@ -407,7 +411,11 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:
{{endif}}
nobs[lab, j] += 1
if nobs[lab, j] == rank:
resx[lab, j] = val
Expand Down Expand Up @@ -478,7 +486,11 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:
{{endif}}
nobs[lab, j] += 1
if val > maxx[lab, j]:
maxx[lab, j] = val
Expand All @@ -492,7 +504,11 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, 0]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:
{{endif}}
nobs[lab, 0] += 1
if val > maxx[lab, 0]:
maxx[lab, 0] = val
Expand Down Expand Up @@ -541,8 +557,11 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:

{{endif}}
nobs[lab, j] += 1
if val < minx[lab, j]:
minx[lab, j] = val
Expand All @@ -556,7 +575,11 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, 0]

# not nan
{{if name == 'int64'}}
if val != {{nan_val}}:
{{else}}
if val == val and val != {{nan_val}}:
{{endif}}
nobs[lab, 0] += 1
if val < minx[lab, 0]:
minx[lab, 0] = val
Expand Down Expand Up @@ -596,14 +619,19 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
continue
for j in range(K):
val = values[i, j]

# val = nan
{{if name == 'int64'}}
if is_datetimelike and val == {{nan_val}}:
out[i, j] = {{nan_val}}
else:
{{else}}
if val == val:
{{endif}}
if val < accum[lab, j]:
min_val = val
accum[lab, j] = min_val
out[i, j] = accum[lab, j]
# val = nan
elif is_datetimelike:
out[i, j] = {{nan_val}}


@cython.boundscheck(False)
Expand Down Expand Up @@ -633,14 +661,18 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
continue
for j in range(K):
val = values[i, j]

{{if name == 'int64'}}
if is_datetimelike and val == {{nan_val}}:
out[i, j] = {{nan_val}}
else:
{{else}}
if val == val:
{{endif}}
if val > accum[lab, j]:
max_val = val
accum[lab, j] = max_val
out[i, j] = accum[lab, j]
# val = nan
elif is_datetimelike:
out[i, j] = {{nan_val}}

{{endfor}}

Expand Down Expand Up @@ -738,7 +770,12 @@ def group_cumsum(numeric[:, :] out,
continue
for j in range(K):
val = values[i, j]
if val == val:

if numeric == float32_t or numeric == float64_t:
if val == val:
accum[lab, j] += val
out[i, j] = accum[lab, j]
else:
accum[lab, j] += val
out[i, j] = accum[lab, j]

Expand Down
8 changes: 1 addition & 7 deletions pandas/src/algos_rank_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,

count += 1.0

{{if dtype == 'float64'}}
if i == n - 1 or sorted_data[i + 1] != val:
{{else}}
if i == n - 1 or fabs(sorted_data[i + 1] - val) > 0:
{{endif}}
if tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = sum_ranks / dups
Expand Down Expand Up @@ -345,10 +341,8 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',

{{if dtype == 'object'}}
if j == k - 1 or are_diff(values[i, j + 1], val):
{{elif dtype == 'float64'}}
if j == k - 1 or values[i, j + 1] != val:
{{else}}
if j == k - 1 or fabs(values[i, j + 1] - val) > FP_ERR:
if j == k - 1 or values[i, j + 1] != val:
{{endif}}
if tiebreak == TIEBREAK_AVERAGE:
for z in range(j - dups + 1, j + 1):
Expand Down
2 changes: 2 additions & 0 deletions pandas/src/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -386,9 +386,11 @@ cdef class {{name}}HashTable(HashTable):
val = values[i]

# specific for groupby
{{if dtype != 'uint64'}}
if val < 0:
labels[i] = -1
continue
{{endif}}

k = kh_get_{{dtype}}(self.table, val)
if k != self.table.n_buckets:
Expand Down
13 changes: 9 additions & 4 deletions pandas/src/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,

for i in range(n):
val = values[i]

{{if dtype == 'float64'}}
if val == val or not dropna:
{{else}}
if True:
{{endif}}
k = kh_get_{{ttype}}(table, val)
if k != table.n_buckets:
table.vals[k] += 1
Expand All @@ -85,7 +90,7 @@ cpdef value_count_{{dtype}}({{dtype}}_t[:] values, bint dropna):
int64_t[:] result_counts
{{endif}}

int k
Py_ssize_t k

table = kh_init_{{ttype}}()
{{if dtype == 'object'}}
Expand Down Expand Up @@ -133,11 +138,11 @@ def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
def duplicated_{{dtype}}({{dtype}}_t[:] values, object keep='first'):
{{endif}}
cdef:
int ret = 0, k
int ret = 0
{{if dtype != 'object'}}
{{dtype}}_t value
{{endif}}
Py_ssize_t i, n = len(values)
Py_ssize_t k, i, n = len(values)
kh_{{ttype}}_t * table = kh_init_{{ttype}}()
ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')

Expand Down Expand Up @@ -230,7 +235,7 @@ def mode_{{dtype}}({{ctype}}[:] values):
cdef:
int count, max_count = 2
int j = -1 # so you can do +=
int k
Py_ssize_t k
kh_{{table_type}}_t *table
ndarray[{{ctype}}] modes

Expand Down
2 changes: 1 addition & 1 deletion pandas/src/parser/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,

retval = src->memmap + src->position;

if (src->position + nbytes > src->last_pos) {
if (src->position + (off_t)nbytes > src->last_pos) {
// fewer than nbytes remaining
*bytes_read = src->last_pos - src->position;
} else {
Expand Down
Loading

0 comments on commit b1c3c48

Please sign in to comment.