From 951e18d3504e7d44e364eb2d9b54a373345ee454 Mon Sep 17 00:00:00 2001 From: xiaodaigh Date: Wed, 13 May 2020 11:13:43 +1000 Subject: [PATCH 1/2] added ability to read missing values; test added --- src/reader.jl | 7 ++++++- .../parquet-testdata/synthetic_data.parquet | Bin 0 -> 3996 bytes test/test_decode.jl | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 test/parquet-compatibility/parquet-testdata/synthetic_data.parquet diff --git a/src/reader.jl b/src/reader.jl index 5867669..518d054 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -258,7 +258,12 @@ function read_levels_and_values(io::IO, encs::Tuple, ctype::Int32, num_values::I #@debug("before reading values bytesavailable in page: $(bytesavailable(io))") # read values - vals = read_values(io, enc, ctype, num_values) + # if there are missing values in the data then + # where defn_levels's elements == 1 are present and only + # sum(defn_levels) values can be read. + # because defn_levels == 0 are where the missing vlaues are + nmissing = sum(==(0), defn_levels) + vals = read_values(io, enc, ctype, num_values - nmissing) vals, defn_levels, repn_levels end diff --git a/test/parquet-compatibility/parquet-testdata/synthetic_data.parquet b/test/parquet-compatibility/parquet-testdata/synthetic_data.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0ebcb902069e742287b6efbab5eb158b3e8c7c86 GIT binary patch literal 3996 zcmb_fOKcle6dl`%YtmG$s*Z7r7m)>4fVB7%r*S0|p1&E_aT}+xJ?0bU7kk{9vGFIi zQ!24w!2&1?eku|hR24x50;(cH$yF8D(bP zyZ60&&%NiqJI)c5_R<6Ny|kF5hv|Xc6yujVq5tre2)hdExGzUq1E2>Yrcjxb&o|SDZhk-i)0sb3e;jz`P1~8bSDhE0{`A5PpZ8&? zv%_KQ?7<9emaa2&dAH8cWm`{V{w{_tZF673^lvtEP2&*RQ96^PhX_TtLqTM*mzFd zSU!`x$F>KE>~daogib0j8EJR6W!nzjOb2T(sD2qCThFWQ_Daw3F66zS5D%ClWt8=jYxJ%MtxvUlN-i4#FeN(OzU0`}q zxS+9P13fT0!VNfY-+TxrzNOOVyfO z*09vG-y7KPbphEO?>$OiV87pWf^rO_boy}ipx!JU-w)r0uc<;5Av6a!z78QYfkFio zL#Tkl1tJTfX1BhHg|NraCz=s`4o_l*=oti?LI}Z|7=bt01Wy+3P?OCDjwM?m8V6;S$S4D-9n*~M<6#47|Uege> zty(gSWWk><3fdgVP}7(%3t?uGi9|idT+!#TU~V4UFnlS&3a_RWDV|xuezN|W%&W0_ zejfJ5171GftTEw)%4IXPd@R^tvt9+So1h~MrH|UXaA);fnOoPj&mhtrKEsKL+{!G;d0tf%KAv7$H#A8s8v6a7x9qP{8 zlrmkMZ^YZnDXG&Xs5_|Xb*^v?#)Do#z6M{t`hJ{wc~vR#zI5kYY~qafCGN{-QwFF1 zC`Tn8Vi(%i#8hK*sTIO$Ki>Xd?dy5U0B_+$Nl-IzCW39uC*-ZhWfN^Smk5wEp@jl4 zZu5SD@F}X9w%}8ic^S@am}w?UQ7_yw&Ux1v_2qMk6)qcW5E~`bm@O?dX4{D%?okLW zz#Sp|s)VO;{D5W#7xXiPVk@|5!XO%hCJ9%kACd_|sS?%`73*si1wZsS6prFn3xiN7 zP5L3}l*FFC4f^SvZeQ<@z-Mb&%mIvKq~kBr1qJtP{Yo4#^sp8~nSrz4jIbSls73r? z5Qcp`io^|l7$K|*!I@xiQ-|OSxc|=>adkk!ny8+X5P`n#AMY;UKF{K}eiab@CLlQX zEkklU<$zC9^}dEw-zQWmqsI4X)Vm)(T>Iuz=*axc;iDsW!B6t;KkAF^6m Date: Wed, 13 May 2020 14:12:46 +1000 Subject: [PATCH 2/2] moved test data synthetic data as required. --- .../synthetic_data.parquet | Bin test/test_decode.jl | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename test/{parquet-compatibility/parquet-testdata => missingvalues}/synthetic_data.parquet (100%) diff --git a/test/parquet-compatibility/parquet-testdata/synthetic_data.parquet b/test/missingvalues/synthetic_data.parquet similarity index 100% rename from test/parquet-compatibility/parquet-testdata/synthetic_data.parquet rename to test/missingvalues/synthetic_data.parquet diff --git a/test/test_decode.jl b/test/test_decode.jl index bda7089..1a34895 100644 --- a/test/test_decode.jl +++ b/test/test_decode.jl @@ -55,4 +55,4 @@ end test_decode_all_pages() -test_decode("parquet-testdata/synthetic_data.parquet") +test_decode("synthetic_data.parquet", "missingvalues")