From ad37a4c816870033946d471a1e8cbc4a26a14746 Mon Sep 17 00:00:00 2001 From: David Arango Date: Wed, 16 May 2018 17:22:35 +0200 Subject: [PATCH 1/5] Fix inconsistent DateTime conversion (#65) * Always return a Time object for DateTime cells * Fix specs for DateTime parsing --- lib/creek/styles/converter.rb | 29 ++++++++++--- spec/fixtures/sample_dates.xlsx | Bin 0 -> 9358 bytes spec/fixtures/sheets/sample_dates.xlsx | Bin 0 -> 6495 bytes .../sheets/single_data_programme.xlsx | Bin 0 -> 6948 bytes spec/styles/converter_spec.rb | 9 +++- spec/test_spec.rb | 40 ++++++++++++++---- 6 files changed, 63 insertions(+), 15 deletions(-) create mode 100644 spec/fixtures/sample_dates.xlsx create mode 100644 spec/fixtures/sheets/sample_dates.xlsx create mode 100644 spec/fixtures/sheets/single_data_programme.xlsx diff --git a/lib/creek/styles/converter.rb b/lib/creek/styles/converter.rb index 786052c..459d3fc 100644 --- a/lib/creek/styles/converter.rb +++ b/lib/creek/styles/converter.rb @@ -60,8 +60,10 @@ def self.call(value, type, style, options = {}) value.to_i when :float, :percentage value.to_f - when :date, :time, :date_time + when :date convert_date(value, options) + when :time, :date_time + convert_datetime(value, options) when :bignum convert_bignum(value) @@ -71,12 +73,17 @@ def self.call(value, type, style, options = {}) end end - # the trickiest. note that all these formats can vary on - # whether they actually contain a date, time, or datetime. def self.convert_date(value, options) - value = value.to_f + date = base_date(options) + value.to_i + yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-') - Time.at(((value - DATE_SYSTEM_1900) * 86400).round) + ::Date.new(yyyy.to_i, mm.to_i, dd.to_i) + end + + def self.convert_datetime(value, options) + date = base_date(options) + value.to_f.round(6) + + round_datetime(date.strftime('%Y-%m-%d %H:%M:%S.%N')) end def self.convert_bignum(value) @@ -86,6 +93,18 @@ def self.convert_bignum(value) value.to_f end end + + private + + def self.base_date(options) + options.fetch(:base_date, Date.new(1899, 12, 30)) + end + + def self.round_datetime(datetime_string) + /(?\d+)-(?\d+)-(?
\d+) (?\d+):(?\d+):(?\d+.\d+)/ =~ datetime_string + + ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0) + end end end end diff --git a/spec/fixtures/sample_dates.xlsx b/spec/fixtures/sample_dates.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e4edea546d1589ab1dafb3713dbb09831bfe4f9c GIT binary patch literal 9358 zcmeHt1y@|jx_09Zjk~+M2MF#i!Gk7PXxz1N2^QQn5FCPr5Txa}*QURCeAS5-Yvy|%Sf5fBLg$N*FT06+^+lZMBA4hH}vAp!vS091Gr zS&*}bt+U5-Jzp1FcQY=ZS57p=i0~|h0C?E^|F-|&9jH!K*MRT+E6%C(D z^TNQG%BfJ`2uMD%G}mBWoTZ;kE57P;+>{3;fWOwuFgBfKRXN5JBd@m?#pUD7^&;yx z+uY&8{r2q8k5rh>Hb<19CAofB+OMe!wJ}teHe;ngN_2(Y)4{# zb}xJeX@jDBY#A z<^A;#u-@!V!Pm(=#|(qi<0Aq<>o2@)*5{!=hmEMgXomsg<#RV%CwFeH-=6>F<9{#- z|Frb#6m?Aq4`$@C(tXs>)!bSVP)5~TTB(CpFDOKL6{q=i2?P0B7c&J=k1Py9F}O46 zeq?3s$?N?g+RJYO)p2+P;`A-vHIW&&uAV4tOm684uGJfTxL$J?bC+3)D!wdUor!E! zZ52hT!|U`4GpBM5zzMErWN7$h6p=&{S)s-Q>PBCzu501uWOR>eBI{d)3-{kl?HPZ$kSHFPozj-#&l0y9y6CrPMeY6cXxBOhQSAp@Yo z`8aX^iznV7H%BWF$niJx^>5C=!E6{z%YSyO)=^jO;{kSJ{EXu9&ha9|UGm_jJJdhG zK^<;lU16f-4?15bXZv7gIIqf$0E+ZI9vk$!62$(7fPc}$UJ;9j=!frgERF;kgN`Af zS{>C;ffbSP@ecP7s*W(Qv)%FAB~ykt@(DzCu2~pKqe|Y=$(^@PM+!KM^LX+(xKC1! zz%cW|bi`OfbB}G$yg5?XC`aVm`e%&K~iF)?& z{)M{lxMNSrE#@{wl`2)GphQ5V2TjzFB6Topiu-nPR)_xahKJ1E2y55M8OP`I;}=`Q zgLCV!lm0to>`WRPpTXe5iUa`Qz-GW8^JlP>>Y0I-c?bdvnjS4~Bwkb6(cl)5O*$wY z7}S{DiL@us8sLjJ=HcmGbRCNU#pWdnx!{8-cURq|+?a`9=@&CwR?o6Q&f_f@2o~D) z4lp0@T?ylP8x!8;rWwazf+G5fXA-HI$X`)WHwDQ(Ep&>+*xeVv~qR$jmUU}V3*p;Gvw(dlrQdN~k0KU>_frho9 zYBIAM#detO(HSf}H`?acx6WT}9_F@# zs6GVLN^D{}3iIE@1YfwFB4MsJGV`ElY~MMU!mnR1Kyxt_!umO|UzCI+LhJ-k2THd1 zu2W6NB1>$%Nae$bQ6pk>YmGKzJ2CS4Az?jIg^u%=sEqdX&}VlOHjjuwKOMg}5FbZ9 z8XlJ}E}+4za{vD1{q52^G<~kSA_5HV%DTg*>iTjX4aMmd@UWwu z$=UG3+}Oz{R8(FaYMjtvIoShwGl~m&p|aZ7A@yJmO*=5w1TCj|adriIlVco5zGPF` zn#2hL-B4v%^1a>)`2H9u;pe{Pufi{COP3t*ecg4tOZ?^{qxH~`+h&Yp#?emvQ1|+2naoN=e;om2saQ&)%-$j3O;=qgBS!D~o>2^T zCy1`d04o*I3PEW?n@T~C*wriHKdQ@Q8)JoI9Ei|(=e@ztSyIwfs^%SUdG2FY(5_c>paM=AW^3ID*_WwJ2=nR_0LF+d2KR#ZzpNq zCoyqQCKjpS+!p#b9Ot8Febr6v&`!T;vY1)7i#@CudrIUqGDe`^sH#icgmUsmmPPRy zWcG+@j8Ch@8*Cxn=9iGpnDttE+R{oZDg!~L0#nmboBwXS7`Hk5KEru*&A=+3px$3)H^O&N7i+uH+iuGW^&o50U{=nO9_WV?g6*>TvErZx&5u1|sqj=(*4Wb|Dn_4d$7 zn!~{>0$WH&PJPWHdn1=@yY%ejB7`yaE#rFcRNtg>O~6|frUJ|235&!Iyq(lC9iL*) zB=Z;NkGSqu6CCO};?~Vzp~J+HTO!dBL_svtN0I3#P&opeK;zJMUjiG^cs#ykoE_04 zwVH3OW;qNq9iK5<6Wb)t^~e(Fcoc}$_Egh*v;|I1m8u@WQvAswgz8QsS47HQ>#2$7 zE|C#B>vt(d?fQ?kh690`_q3Me%Z_m80h$@qjc1V3dzxpbQHbfd9*BO5%8p<@&hNNB zyreSzj0pMpwE{y!Y=QhLjin=t=xUlbhj;^2?+?MqI5F%ha&G8R>MFHYNK{GB-w$6? zZLNwU&TaTl!qN3|KC-@|A!$N~^o31C1Qc+-^MUJ-7pZz)Yonz0TAsSRs7J{M%@2&d zdN7v5>yFfaN1?eo_%=bNG)Xp|;FOqeKVo2YAs!)_|?HH4#C)zyFYdJ?N# zkK^kofu2UWA&{-=+mCOrb`9v&H%)07@VK1i0IE>dLV_T)l(0h897y?7JcH@aQ4H@g zsZmo1UoO9)iO~=?c0t6o46)>$_99sdTlNbjk+v|5_n=UqP^gA}DpmxN5Yd}O;YKAh z#nbR`DEML1W7DhuKIfbgHYb}0++9y1>Ota4V&97j53$-Bvh1@gMWaW>j7tuML_?JD z6@VlI&ve|**aG@{eN5}FN8{eM9$e8>e+J$Ud~FL4k-TS2Ot$1=(Cf}78DDCstpPW` z9&{bNm1lIIzHgKdJj*qv{zOWK{O&9|ED1*jr|fAc8uKi8DA8X>xT8j>uX-aew}6WV zZhU5(21hBxT`B|xKdGLPiN;dZ3th<$9d!f|g8M!&f{)Cb=54xx6$6fPu7k+}aym%Knu z!D0n}>t*87y(YN0Q5))dRQ2#hpyGOKRU!=K3iJEUhUSBg_k+R5j+Y_p+1K`UpW3v* z7>tu^-|kx!AQvp&6Wx|ugjRhKc&iPg5qO`V`m4ltj;1Ua|NXe%#jh(8>gDI;4kXT8m6KuY(T$d_0qOpwa0oE8Ieu~gx2EnC|p zi&=}-c%$8(-FOMerYE(HyAC^~6f)CIMxC#KzR_Xz1_xY`T8e%bXvg8NKo6G1Q)nuQr-DC{0HWe}kv`mHV;@ zm(n<`6Q#EIzTuSuX%$(>;&jRgGgfRhTh98C!@y}v_(#p!ME--mjm;kK>4+&PH=Vw~ zrdqwsa>yacIQL9q?of|W;yK@=cTd|V$Z%*S7DeI5x{@tqiFDx*^-p*lhdrsL0biz{ zNCziQETI0VB^!WD);xqw8!ik-mb->|Z-owVk42gwr4=k8GZh*nzZuW~rRlPuGJ^Y+ zlM5rnyY2+Fn3lf@t0a6|de3-ycuik^!)%PtV$R|p(~48dkj2nFEuYSI!fBm2a)JG} z8}afXx9{7FjYJ62N!MvciN)TB61!$>h0qRc(7oMch3abHwPi~eOq8eXA5E7`)(-2{MOj@;MhMx1%iC9F_lu6k;Du2; z0iIP?hu6s!n3kfAFQ@t=s;dcUq(McQw$xTEg)IkZPh}+EeTmRV zm`|Qcv+nlrtZwMDA6=RPQz1ISHOgqWxi2!wPvk7qYzfj@6dw|g&`Ku}8=@~lWyKQm z5Wc_3<4n(szylVa7Xg0CS2WZx__3#-0VItv2?K7NzQL32?*+@SCnEMNmT#DsS#rD( zlV8VT*6s?zT@Ds(TDfTXbY;SK2x2aG(*(v#;2g(BHK0Dmp$uKeA|CG)t#bRS)%uXc z?R7&87@y?am;-&o8gK`5_1;di^?J*BR@C#&&)?c9Z3eYS^%T134P)-?@Z6I^DQhD1 zWgxxnHV8P*w@Om7U6o6$YB|=Wcg<0)+@Zyxcsad-(yEu4ASq5>owgO8?|(DRek-zYLNF$ z*wSo>44XUQ-r?8!b#;S6LwNn7;c)C9Lt59TLws>$Eq&My$zC;r8G>)SXIAO3hFQH? zhb%hY@3cU)Twk-in{u~K7mH9T99exMVi>Z|YVNlh@UfMmldubC5P5FY;nYBCiX0U7 zm79sTi0{S2bXRZ2!A3+dYzco4U>gY%4y9nhD zFt~2^v8VBN-{*A$j_y}78UxJ_aT+RY`VO}ihe*}82sIfLrgM*-{2jDV?Lj*VgAkhMmEm4qCHdKOrg9flYIk8cGqg8!Gr)C6`^u!8WLr;iE-Ak z;(D-f#kEk0u&PQs99^t~9hd0yi&q`783Kuwac=S$g`=wJiWKHcy|o7M3d#{`zAwB{ zPmi9cVCbrR-#ulOIg5U1FgJ3|s-w#vpXS&$q5Ujy*7j(royw7I3-H|ZC|O(-kvt$F zZ_%Ywpxzsv$xPE1-6f`3;M#=>yiMFd_OeiPrAK2ZU>-honij1Ogp8-oHj{R0@&@eK zX;njccGGY(4!}5L<3U47sW@nTi@`6+_}Jd?!AU(_hRred91rKEtsAWzL2H(NUnJ=OYK17FS4b=v3jY!>S@F4P6BT*ZUMw`{YS1 zq)dq-k&$<~Q2wzWteTrcZEqc81x(O=X)2*s+WbimY(t3M=<(|Bov2f|jByZ3;B2_d zt5G-lD+)lZ?9qyF80us!I}*J9J;gOMry4ISMmW@g{Z?+@PXV^0`%`vfs?qVl6`5FP zDWo)m7q%SD4BR zoO3GRfb^KFaj)KttaCU&R=%3nE?>DEZBssdoM`L(^vY8^6pwI*qoc>_>>%KIIRy)A zj&BvHcY*Md-3fAyn${GQD)=ra#-Pf{zjOR$YQ(n#2dxl*k1D2t zEv7#vHzxUe@;ByWS)zWdN&ESSz=z&5!ukXvQvNeb#;6KPztx*be1Y+=_FScWiIXXQz!nknCd+~(R zMz`B(#XV%yW~NDFs#4T>OI~-)(Y__NTMIY+WkjlKWupeI+S#A^-Bv@)|eXbpW{7yNWEti57-6H<1XK;2z4F1*I(x<;e* z@(FEimBQEFvbSFyW_4%%w6eL-m1O-7Q67ef6tgEYEy^z%!1N5)g7X}d*}>+o*O6wb z>a8z^+Fp8^?Hh#(=Np@Q^W|$mLOh1H>gJO#FtDrp(ZmC$Mc0DTdK!eP$Q@*Mr^Gge z#}+4@Is^I;1lz?C7hfGp4>D)u%T`Y^Cd^%w=WU0nXBF8be`GHicxlm4hfuv`g}D&c2Ot8J>9GKs{mmCPQ|8H=m*&i?er z#7A=7dU4lG-PN)bFUzfw>bKnVld0H7G;%)Nj__3` zpiKi9@xZ%tCYJRz>nu@Qy0lx78ridA)w*Ph$-g3hvic^N8W8tk+ON!lBLqtzP6S~) zt4%WPQHK1i;OCj8l)RABn9zWv&$DZl4h%`{w=L08Q?lIo4LGO@4?lRtqiVx!UT2qx zlNCc^RNc?}uKQ*{ty~Bg!EQ`c<8{rS^~>s-e=eVy@$c`(T*5+zznkNEQ%j~$7(j?% zqqu(ti8aX0_P;2B!Q+2gu1=4}Cam&6e&g_GNypf5S)S-VDNflca{3AAJ{7m9NU>0~i9$UDs9x^h+mSR0i_9Ogvg z92*8%YWJy$*-r@DLW03ik|B%H#o+<;d*hG58j>Mkr3eFDNPU`lPtJ02 zE^&XEQ*x?7sSPOeZdf}$os3Df3uDZpP}(Yt(>S&s#D z!7pTZqVJ@Nv%<|5#y<4(AWu9WiRxItf>gFQ1R8`Co7-0mX@y;=Q03ppc9cIuRM|%3 zfrBG#Pl`r!83nZ+n@%QY;rB_9(IalgNO51}5g4vjD0$!KOdwxeNQws3G;nUGEL8JFzeIN zwY?TjR0V1)SNvU1cqDmqpQ4*)td}+T>v(Dccl%FRnd4u$g@fmW`NjYI<;TB|>EHc7 zdM{aN}c^`E5weGzJ@BEe1<06>TRLSPlR JYuexb{XdS=N^$@I literal 0 HcmV?d00001 diff --git a/spec/fixtures/sheets/sample_dates.xlsx b/spec/fixtures/sheets/sample_dates.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0a5d92fb3b9cc6ff513f9189804cfba274ae3680 GIT binary patch literal 6495 zcmaJ`1zc2Lw;dX3kd#hAKtMVqrMo1h2M~rF8tFzHkdg*zP`bNI8UbMjkmgT!3%q&y z-uJ}s^WFK~J7>>sc2vbK?6jLQELleU zqovDnrp#b>-H=EJ`Qe2mk8eZ!G8%LWP3@jvkZGrh8sXf@5dZ*uxL+(B%+;J79Kjss z4$i=*9(JJr4&Iw#5x{&$lF)vOJnN*nQgzZT{HtF)SeF}F{ubS{+ z9DgU;PYol%gKGuRFBKt9p;?FdR)LydMFJVehI}l)Y1)(Ctg zGwPS5A{b7=_seMi-3V~r^j?(fVmC{x%PGHMd+S7Y36j9fB? z1h%UZ)fu~2?7c@Jg~CLdy5)h&per=dI(-$h5D6TPsuBO8C3%&Ea1K|3TZUt@isl@} zCa=>!<{x3mfa*KkV@2?w@aUJve#M2~{r3OFh+%k)`02cgbA2~`1(x^V2O);A6NKRA zS|yOYb)A^!wY{dC8Tv=G%FTD=>*16^Bm zTb51qATj$7S&|=Ev~w3&l?-@NuoH2a#-#JDk5~53ZCcDPxD|c?UKSn$Q77Y+YV5F9 z?KA_Rlmxerg=_8h9Usp3g$I;rSs(G4ANT4#l_CIv!$2lxB(WG*LPO3kNky?d7UE=Ax%&pLeAx9=IK7Tmwt8^yFD`9yDl=qM0AN=e z0C@PzRW8;*J0Qo;E7#u^*sh_S1B?&vcEOnTpY5?Pv5alVu94Ld;|Q;<7|70&6Il$f zrC&3oGhLnvYwRbc)t7vAoe>mwW}rTV^o7YDTCeUlT?gyAYxN7Pal?GL%h|tlxey(n z4&5GG-g=p}3HzKTrWIl<7ou1{RqY>IU`|LEfM9)0>F0kfBUmk6Bo`i-tt-qb(JIOD z0NmVRvdX})?snKLuzJZ8xg3*Gu*CP#a4jAEOE{w98q-Lwom1>s@=XT5uWe=`pAly< zbxb3j#j+4j=)R;*UFj`-kJn|ugKbE8@^emVn4r(L79hW|zTuLv9zIe-S3EDra@ zGa#{-j+ZA&f3tlJEJ7BV@xJC;qFXQ4En4eD<5AC?&^CGwLJck^*VG@(&;Gc#s0Qph zjAj9Y7XzSkS;htjxqJmn%DRNa3^tOq%)yXVb8&iY1>=1;8$3MwpEOXTsMhC*a_9;9< z5N#uNml4^uN)SFc_iV|dd#I8Auwfwcney6MpY3%^`!`>K5kT60jhTScoiuCOd{LhI z3#pod{HKxz3-S)zhaD=Jz`@fWBsf>M?(9(g_jl|8UAR|>eJ)q#OcVsokG7g`@x3J+ zt&_flLv^o6Vnm5)bEc+!du%`veno&lDd3P zBo1CKV~$#p&sf3O?3iH1F6kN1HZU~&oUyJmB({gE%tE#5lubi*F>-p1INSrgzj}0P z&bYOrqD1tb`DxYwtP1l;&xC@n2J1)E| znXhA~ZkT6siuE1HTot%+U)K4gMFzic}tLU z3su1jC$f9Rr;vc0I9cd|yJaCXvR4Aj-zXl$(D%O=g&_0F%Jk!}%2CV8 z@XGf7YW4EUGW}{X@yh1^YUS&(@W(b+5|XxMzb7i&!$jMtLvM0zsKg38S_NZfGLH^RN>m`P9kh9o^$IeVo14XGF`*skd# zh)22Y3|JYuV;M0ireX;!@>)gD-+E7Voozxy*xeowo!RlyBzhIV++mI=V%jksn_7<16i$n^6k}M)<6vGo8V0 z&%kZ}?6~oXxS`A3cexW&y%c++Vtn&Q&MkPgLAizu02~kj0Hpu7O95L0fi7TPmmeA-iOS&@BGd20T$5*ot`%e^+V zsb3W&@lxN_d?tIe_1)LnW7u)k^PbYUQVTZ|Rh#ZPpEZr6WkJ|p zuD(*fM#a0uyUHVti0RFY%(RFG;1+7Ng2Ki=h~3?$#hS2ih3$GMnmXA?ig#Fs`(fetB_2kpss;TD+_iFkNo{?D%e0X$f@0v~^yaiPPz#g+^QYH%}(7nvA={La4pbp4-7zsA}wtFcbMa z$|cN@%#ff68As0*S6)J?myG4ixrGE&-5rARu&F>XGyXIFZtV%?oc&z?%*|9ZRP;Ai z1iZ#nhLK8l>@@n!R?B{P^uMFDu~)#@w;VR4?lwuJs-P)OKD)0Ak5bNN^H9kd_-OdD zC>^z>1PVj(Ch>H6gY&U3KhVi=yDAl!Q;d{Pd6PkjW@#!ADiE-}pG2Z+6Y{qHb97Oc zdkm4O&BG=j$0Hvr>~ntB?~KUyxZT;z0Qz%m`iZUznx0C)8lkR=o#L-w`ARi7OEXAT zo`x`(Z1*~-8?_tNW!k)G;T>&CW9pixgIj1KF?YTc!1T1PrYE+cV^S3Z-V#X6^82MM_+Wlv_aP8^mD}XgsOoT z?58S=8$O3$Qd-cfglA?qpMBeShgD#dXZf~E$Dy;;vO7a!KPVz9VPeN(WIhs;WkRVD zVs!SF)7RFQ&Dibp4`&lea^8jR`ln{CMs)=aU$edHcrqRCX2F+x^nEis_-U|G;jkhaA0$!#PzBqp_-M?_F^&&TSQ}Ph`6i{&k6_1zMw&7g(cX~y~=KtS}G2f6dnuj_YJJkFgTb-dCeo3GwX6D^0APoeEdL?`b~LM z8>w)#WYJZLUkFFok9k7X`y5dSZO)010Dv#V008#?aCGqm0sl&FSJ%KHm6OnSzf9cM zAs}}k1*5Q5VfIWzS^x3wlz)RZto0$eY#0eA*dL z@#d#w*a3b^2x#NMd5ptS2iv}$z9{zDRawNgQNG0J4`iQ=2_c8zP1S5q%Lkx;=Fkl( zT(yXL$h;cAVgJer`Kvw){*H=>!ybXI1|EuwbmEIimCl!1!HY)`t4X8hJjfIH<~xab z%*e+0(iCQkIN>A2sg<^uX%C$<@`IMa4oY(@k+zbh+g`e()Zexz4Kbf$Jd+WQ(lkL= zR}&M$#T(T1RuC~waw^DWv57HUQ7FTD@a)-lJYVr_r7UpROSx2jnOY@f96llg#mK`a zR9n%Jk`af!Dy))(T59Osn;I$j0XIjhfF- z6|`V$c6_(n+QQ1WFIM|uJ@-5SJaOGFczHFmBTmKsp!1@j@=VxU$oS3cs)1SqE+~$v z_R{HmWH-ES=bu#nF!T>BS)@g1fuzNP|1N4UjbKveq9a*6Vl2Jzak_s=xQ&{2H0|&J zCtw3NZ0fnq3vUm;DFMUbIlCzOEoUA(p|CzGk(@A2UX~<0F5O0?u>D+!Ozptl|q?m zScN`r-Q0jnY**>ZC{alKnpuqqu~M5U_+r%{Y+X{)X@HCNm)Jse7ld2cMlrlO-Xt9Gqg{P5iVQLOo+5efbTJA3DHC;(WC?3k?p zYpT2Im)!mWh5Klx^R%Id7x?D z{`AEDNu{=Bi)P3)&(62D4S}c2@i$hHj0Z|k_h5uQ~hIijPeiOF;1m4$re@BtR zSIWOBzkkB-tEIogiQxL#zu^C)o&JfwuTK7sUV)?k&?^4~-xunB2NS_d2{`yKvfZC{ z?yFe8+Yv|kY3EN(>rVsstC!ymL}31l#lO`ue_FYp)PJ{95C1CyPUqiJ{GS%?r_A3i tq~ZUv@UH~=C-i<~_#JuxUt|9vPN*rOz=H??fB}E`!{=%Mo~8kS{{nk8Fk=7! literal 0 HcmV?d00001 diff --git a/spec/fixtures/sheets/single_data_programme.xlsx b/spec/fixtures/sheets/single_data_programme.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..468537f9fcaa2dfb4311a3177ddafe9b871c4465 GIT binary patch literal 6948 zcmaiZ1z1#F*Y?oe-N=A+haw?JcXvn(-RaOsgE$Obk|H2TcQ*_o-3)?+A|TQ&@Xh-? z-~ayN`#%4F=9)d{>~r1soVC`z)?VvAnkp!$qyPW_6Tk{7Q7y{W`v6B=i6H|3H~><> zb59#*FJ7L1jYt4O#Ett$MN*GrA0KYieh~IXz}xz8e)(kc!TctHR*VB@7XDanV7bZJ znP7MgnlhEMg3{gHAzeLu+)}#=wy5KLQe4*1Yd?Is@aXFDq`7nY+P5K~cUUoYv!~>P zGV3>a0;X+xMmSRVFd0ky*C`?lpv@+IQq9|KqDG$}oL|WWa5Nn{)3=vjM2vG*NN!)_ zAq_kRqI2gpw~R%Md@V`PSAlp$b}t_x>X(2#rp=XNe?s<~7SeJ0vhyV8rO-vZVdEV*4N)K< zp^uMY8owDei95tHODkl=B@4&=Y^4Tfg^quz}PsY4X7&I^r)^2JrrOe$8?Mj} z@hrA=(Zn^B+CABOMJKvf!)hD#)w*%%xtd1j1jZg$wDV|azRR1L#*KqBe=wWH^QyAR z%aLd=iljjoHVF)}4NYxo>PY#B2AYZDwmCnbRc+_nN*AASjmk?Qd*Elj#&`4qMAHV+ z+Yh>5pE2lDiepoeS_$3Y*+evIu4YRFFv;~h4;|HGOtfZAy?AYXL6^nVlG~8#x(W5Y zG55~>^mw-}O^c;}!!=+WQYuEKV^|%g=6r=A*<`G46)BC+n>-f!X;Df2bquc$@h$Ul zX0`ZRl@|ZgKjM${$%K9t;j#CKq(J?*$NtU>zx(a~kt0SCIpQzp)tnpq8Y{CWdSOMH z!cUMQ+8b4!mF%0I`dz!~C|Y3_VC413*9+!B^>_C@H1jgvC-g|^rR4K9FPszz9&`q@ z1cdo?7BXH0|tfeXz?M@VeHz&79?qiWa5nv|344*c2QZz6#JZ&!KS+ql^9{x$Od(*oNwg}cHZ69%nT5uU7ktYsO(&mwL4Fo{-MkqD|8 zua`9`V^>zuZV9@M&iuIPf@Bf;qt!wzlSzA9w*y=i80KrYiXb^Ic7Fes#o% zXWc1Ud)I2-p1sJB5OOVruP005h?F%V;|Wqwwuy!SQD6^`g0i&mZFY5;Wu>*9-)KTj zCX)E<#HrPCBKntk@+ijrB+aTeLUT)`b2TfmS;7Pg^NR)I1RYG?FrUC~twPT{^oCcd zW;GOZi1<9nGP2IA*XCxQYCG%OsTw|tpj!kU*W_#vsN-3bC75Jr#Bw67`7Zb6(0=bS zUTvP3AG##DG1wIZ5Tj1Sb-K9mdufcTIOFKX1bwyxhHUyAa^ED=2M7E6?~IFGD21Q< zUo6`1naGd#EC}W#bY$k(uZ1w5a~U6? zf345Z5g(tGq-WW5;vfg`<=~ubrnQcL&q58omR-ct^N{*5E>yCFFIu#TCB?{q_E=0L zIpgjZwvb3pV?caeQb(upP`ugu0D0b`_Z6x}v^AD1wzW)rs$RUf0*^B0u=i(K;GWD?XzKvZJeGbiQnSl%Guf*Hq9iEx%^gPaIsM{rvSHLY>kQ^ISxq5 z`dk$0%pWA^2DPQPD3p6CMdGtCP8sPI>N2@eN0_wn+l-uM$lQVRdaez#^|no4$Dr8; z*#k7@J?;BhFR5i#nm&c)J{7_l+u|+n^|D=0YdLg(HPm^xEcyt@QF}rW|HTmX!(xeP zN&3XKzQme|6zoOaGg6;#*gN7*!s_K$Mk8y^Cs)}qX?3gtZzQNYAayrLOmbMAB zN*W#rzh9x^Hw!|8Z&iODHA(~uDw6eTpD0|eBwJuK+THj&PO)ituU0EVWfH%MJ69R+ zv70qnX{YJ$8NFM((YAk*wP;}5!-W2oLbi;BNLEMZId&=UL4z%OJ&>CP9;bcCmr`}; zUKy1gtUz}4R6y)t2Hbs1_ds~x*wRDp;*S2}V*hSJVyI7a>*Eb8>p zxNLi@u7q)Ivgx~5&8B^EXPM^E(Iv8}Q)1&BLJ!NCm+lS`8PQ*k=ZFU^CJT0*YFqt7 zMMqEDYnjjsHN;ra<+~pWX)v_#T)bb%TE}d3=U?UWqMw-!|0;$P=Z`*;-<0js8Z#Xi z*rPq{nOJ@?!6RW*uHbI7&4J-)wkjGKc&hp8>O`Mavg8f5R-&F9%|SOyvy+}TX(H)a z$$+EF!lyMNcQd*Cb%R*2iqB$dA|nRv_pIPgq?bwfrh`No(gwPt%}cn zaP#uNqx`YF<$Y1`)ta+Uk1Xc5cPOnnD#Ic2upkbxxiB> zh7oPT2o;L)6!4*c&Cv?cWo_OMCxO-y+jD3H=c3Sv=@BzkXxQGstMl-6%#GU^E| z|7UKTNb^Q`aD73fvXxxqzOv>j-J6XoI~pi>~U3ik_BWcme+sJ&kSVe0`ggJ{_Q zx%|Q+>50mVupAtlK-;78Yrm_XcmHS3?HzU5EyLuY+a2=#4Tzjg&Cml4QLE7c0F?i? zy798NvGMle{p;r+ccGj`we&1SQuHrD!D!KP`hu?`3o}YZW6`_F$Z~`yYro2}V^oj+ zx@O0&erHZAcv6U{3OMqA$`LyUUHUxXa1+9m_|~L%q^(7^Km0D-9#GM5cNAWecR1)iuYZXZjp4h8vEsUZDMVWPHsIVd-s(p!aKlbzok}=k zia6RHV-Yj=b_6aL-RRhvp?B+*?KE78veQA&BuO#9#}dD={jjW;XEJd=;LFSZL3z0*MLxsZ7kNLBGh zIm^)e)L{c0{LFj@Z|c|Rq5P8-ww62pC!OY=fk3)dHqjN;p=_3+N7IL+NgI~~Ng|r> ze&9BdbHT%zot|ko;wa5U6*)5`eIQb*_?o%{` ziD9mKyt()Y49s3N>AMi@nD*b*HC{3ix68VS!b;Nc<}cVHkxlD}70FprYrLq>gRa#&1#nT|LO6Zh^13vvguEi@tr%RR(o z?JLAkKY|z<%MnAfy0onEXCLV%rSdlRnD(m5L0$-)tng{qvy--A$6=5*T_>4r(Gzc< z!{S8mM)L}l*`F-?^9&t#9TpTvXxfB{RZtM`>q1UWaFAq`C|#W9hj*b3Y^$b!uV1wnfnZ*VsUk-Yz02qRNENBs#7dk z!vA8R+uYF!Fwe`P%=-0Ih@DW8q9`5PuZjr71(k#WK$rr^RN<;p#f*Yb(HOj1fKk{7 zDO#v7?3=l#o?x!QXlU6>Z)tyYT|5DDMFtQ+0F~-xy!1mx9ViuywjbRnluKHl5g$ek zvKB8hzVVyg(G-UAC|2R?g~y{!VNiwQO3O1^L$P3_AantY8fkJy6)0H@N-g^041-?9 zVY?$pm*t(dK>IHf8zo*$xm78l?y<;-5lwh!c{@0xl}z6tPTU z4@(;ZT|84rb=m@+z3_d$J%eX1@l+S$J z3@xmtN-)4^ZOaGiJCqTc%FF%*wLG2hc1l+!IYp_l|V zQCAgOr;-8Rwv(?4=iax=WL-~hgDfYvkR&=MVersbt8GB|;>Bw&Gm{o3kG(!cU zt=4rYRv$A(t8|(ZO||FIlbd3qd8Yzun>Cr&&EgXjB}$Ln4Or>t)>34}ejW1+GSx*E z$)ixv24}FsQkpXCPSUEH)0F(oq{9l86zSuf3d7~9E)t25S2CRW zD!BcS(xA6A-hZNUV_mg+su%k*$lmk7Es|&QP%Jr3U%TN99?ZpXYOYSebyUk?uweV zt@XP%scQX>tXnBZkPpe&;!{^D;_>y<)))qjSjZ3LNByElg;G&p^lr9|te|dH>yC?U zux6Fouy*Y~z|HOsT8;;IKE2u@V6AATi0^=Wv8M%Zq@`u=kMw-uaOv1D^65pTPXYwz zFV8izG>6X86f_DXWLDmc71sUHGF}*gwop(2fC6Fw0Plap?(OGn^N;cVY&h%+=OgUc z;>4u)cd7@!Pd3J1LUj$m!d~EB0=j77bj!Uf)g7CUeg#sBid=9{e~e6wFDjU@??J?_ z;ijzIjY97C(Fo9?LV^5gP8`wKbi0jQ!m-*j%~ZWw9)J@qxLzt-_yI`R>zsy$=dp^} zx;Ck7KazMzC+g}EiTOR`!}iLNjooPbxlq7n<1S)@mZOsyF3g*O`^5r|>U_w8o6ls` zd7meO*J|9kXB<7ZI1bqpjk>*sBoRq67It+yt z)Ze*_yngHy0m*mCP&-yQ2#uk3H;AqhT|+M7>SxiHeHcYwxG)$v#3YMLWt!a)zV#HOOa7%ieYt2S?EV| zR{`PS_&UeOs>_0tHBvO=40O3rucW9fxj^`=HEXb*@Y`pev$pOghH!J$f|-hD%vtk_ zQ#{MhLfOQ$N~t_kk0;-4W)5y?dzQuX$?@C8e8=C3_kGAmBOWTiPvdQOATgqfp)Wy8 z>c)hZ8s4jCI(m^0rtz;8xl?qxU7h`wmnd?V;8c#=Ff*S4jg-G8a+z-{;Ty3(hgvkm z*2X*k#A)rda$VU_+kFH7RaWe)vvFb%3CNG%u#$9j&I4(1-E+U6iG#h+9t39jIH z%sIlw1w5_d?*8b&c{r~4(L=l=y~hhYOGwA2b6TawfZW#aJs@Tr+63$tQwv>sLimfE z&17AB4x?LCNvTQ( z5b==xkV7{`g0a8}3|{AnoSnFt)+-;LV7Pw%Qxp_%6%`5Q$I{#^CKdsk#&7j*7TAJZ z*(2$P%JodmaFt{3O5!q;T^BvF!;isb?7&d1%`64hAO1h;R;SZ9TCd!?6t1tXwyh`9 z-@>oOgO=OvPseEJON4Mg$BM0NNq^&?t`qE4mpD>Cwx0g6j&(nAE-%L&ryvq%GeUUz zm*`?5W<~0VD?*}_JHLrzgLCkEYw3|C$T_5QYj0y@fRiZK zqn$~5S6|R#mqAISs+Cfx@&fDz)djLd3gaC4(l|vS(c6p=Dcd*?qxg-y0fi%C4OW7; z1M1-{frR+p41=wi|=dx!e^wZAj2ZCDn5@q->5@~u6=oL90>JO4Ci*Y`%#halPrpAPgn z02jIK{m~n{Zrw(rm?GFX?&qU1=|`z9uAbFP0Gmcs_d;cOThmqVBbYn-m!vFq00x^B>`0ee)pTPTq{qHDB#7g-O`TkG%eaZWGI2l3& z{Wtu71@J%7_a)}v(HjW#AA<9r;QMOp?_e@SVSoVtgZBC-&wVNKcOEH3e~94uQ<(gd z;C?IlJ3$Z5zpeO>#_~^=`&G^FEP+ISvHVAk^C!do4f1yeFOt6*{=HrP3B8{geup-Y b1O8i{&{RP~BoP1r8}SK6#0o 'Date', 'B3' => Date.parse('2018-01-01')}, + {'A4' => 'Datetime 00:00:00', 'B4' => Time.parse('2018-01-01 00:00:00')}, + {'A5' => 'Datetime', 'B5' => Time.parse('2018-01-01 23:59:59')}] + end + + after(:all) do + @creek.close + end + + it 'parses dates successfully' do + rows = Array.new + row_count = 0 + @creek.sheets[0].rows.each do |row| + rows << row + row_count += 1 + end + + (2..5).each do |number| + expect(rows[number]).to eq(@expected_datetime_rows[number-2]) + end + end +end + describe 'Creek parsing a sample XLSX file' do before(:all) do @creek = Creek::Book.new 'spec/fixtures/sample.xlsx' @@ -63,15 +91,9 @@ row_count += 1 end - expect(rows[0]).to eq(@expected_rows[0]) - expect(rows[1]).to eq(@expected_rows[1]) - expect(rows[2]).to eq(@expected_rows[2]) - expect(rows[3]).to eq(@expected_rows[3]) - expect(rows[4]).to eq(@expected_rows[4]) - expect(rows[5]).to eq(@expected_rows[5]) - expect(rows[6]).to eq(@expected_rows[6]) - expect(rows[7]).to eq(@expected_rows[7]) - expect(rows[8]).to eq(@expected_rows[8]) + (0..8).each do |number| + expect(rows[number]).to eq(@expected_rows[number]) + end expect(row_count).to eq(9) end From 78dc0a8907c9a9c1b1fca7aea5b0878ad73e3ebe Mon Sep 17 00:00:00 2001 From: Baptiste Jublot Date: Thu, 29 Oct 2020 14:14:19 -0400 Subject: [PATCH 2/5] Ruby 2.7 updates --- lib/creek/styles/constants.rb | 2 +- lib/creek/styles/converter.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/creek/styles/constants.rb b/lib/creek/styles/constants.rb index 1b4f156..bf85510 100644 --- a/lib/creek/styles/constants.rb +++ b/lib/creek/styles/constants.rb @@ -37,7 +37,7 @@ module Constants 49 => :unsupported # @ } - DATE_SYSTEM_1900 = 25569 #Date.new(1899, 12, 30) + DATE_SYSTEM_1900 = Date.new(1899, 12, 30) DATE_SYSTEM_1904 = Date.new(1904, 1, 1) end end diff --git a/lib/creek/styles/converter.rb b/lib/creek/styles/converter.rb index 459d3fc..5f5e912 100644 --- a/lib/creek/styles/converter.rb +++ b/lib/creek/styles/converter.rb @@ -88,7 +88,7 @@ def self.convert_datetime(value, options) def self.convert_bignum(value) if defined?(BigDecimal) - BigDecimal.new(value) + BigDecimal(value) else value.to_f end From 571b03d79b0f67167907120598b9cf699dbce448 Mon Sep 17 00:00:00 2001 From: Baptiste Jublot Date: Thu, 28 Jan 2021 00:33:35 -0500 Subject: [PATCH 3/5] Add option to include headers in rows (hash { header => value } instead of array of values) Fix datetime conversion --- README.md | 59 ++++++++++++++++--- creek.gemspec | 1 - lib/creek/book.rb | 9 +-- lib/creek/sheet.rb | 108 +++++++++++++++++++++++++--------- lib/creek/styles/constants.rb | 3 - lib/creek/styles/converter.rb | 23 ++------ spec/styles/converter_spec.rb | 2 +- 7 files changed, 136 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index c17f155..5a87c78 100644 --- a/README.md +++ b/README.md @@ -26,11 +26,19 @@ creek = Creek::Book.new 'spec/fixtures/sample.xlsx' sheet = creek.sheets[0] sheet.rows.each do |row| - puts row # => {"A1"=>"Content 1", "B1"=>nil, C1"=>nil, "D1"=>"Content 3"} + puts row # => ["Content 1", nil, nil, "Content 3"] +end + +sheet.rows(headers: true).each do |row| + puts row # => { 'header1' => "Content 1", 'header2' => nil, 'header3' => nil, 'header4' => "Content 3" } end sheet.rows_with_meta_data.each do |row| - puts row # => {"collapsed"=>"false", "customFormat"=>"false", "customHeight"=>"true", "hidden"=>"false", "ht"=>"12.1", "outlineLevel"=>"0", "r"=>"1", "cells"=>{"A1"=>"Content 1", "B1"=>nil, C1"=>nil, "D1"=>"Content 3"}} + puts row # => {"collapsed"=>"false", "customFormat"=>"false", "customHeight"=>"true", "hidden"=>"false", "ht"=>"12.1", "outlineLevel"=>"0", "r"=>"1", "header_row" => false, "cells"=>["Content 1", nil, nil, "Content 3"]} +end + +sheet.rows_with_meta_data(headers: true).each do |row| + puts row # => {"collapsed"=>"false", "customFormat"=>"false", "customHeight"=>"true", "hidden"=>"false", "ht"=>"12.1", "outlineLevel"=>"0", "r"=>"1", "header_row" => false, "cells"=>{ 'header1' => "Content 1", 'header2' => nil, 'header3' => nil, 'header4' => "Content 3" }} end sheet.state # => 'visible' @@ -38,6 +46,46 @@ sheet.name # => 'Sheet1' sheet.rid # => 'rId2' ``` +## Headers +`rows` and `rows_with_meta_data` both accept the kwargs `headers` and `header_row_number` to load +the rows as hash with the headers as keys. Also, a `header_row` boolean is added to the row metadata. +See examples above. + +Headers (as an array) are loaded once by parsing the file a first time until the `header_row_number` is reached. +Rows are then returned normally as an Enumerator as usual (new Enumerator instance starting from the beginning of the file, that will include header row as well). It's the caller's responsibility to filter the header row as needed. + +`extract_headers` can also be called manually from the sheet instance. +Once extracted, the headers can be accessed through the `headers` attr_reader. +As headers are matched to their respective value in the row by index, it's possible to modifies the array in `headers` to customize the headers (to fix typo, make them unique, etc.). + +```ruby +creek = Creek::Book.new 'spec/fixtures/sample.xlsx' +sheet = creek.sheets[0] + +# Parse the file up to row 3 (file starts at row 1) +sheet.extract_headers(3) +# => ['Header1', 'Other Header', 'More header'] + +sheet.headers +# => ['Header1', 'Other Header', 'More header'] + +# Headers can be modified before parsing the file to customize them +sheet.headers[0] = 'A better Header' +sheet.headers +# => ['A better Header', 'Other Header', 'More header'] + +# Parse the rows as hashes, including the (modified) headers +sheet.rows(headers: true).each do |row| + puts row # => { 'A better Header' => "Content 1", 'Other Header' => nil, 'More header' => nil } +end + +# Or both can be done directly when accessing rows +sheet2 = creek.sheets[1] +sheet2.rows(headers: true, header_row_number: 3).each do |row| + puts row # => { 'Header1' => "Content 2", 'Other Header' => nil, 'More header' => nil } +end +``` + ## Filename considerations By default, Creek will ensure that the file extension is either *.xlsx or *.xlsm, but this check can be circumvented as needed: @@ -82,13 +130,6 @@ puts sheet.images_at('C1') # => nil Creek will most likely return nil for a cell with images if there is no other text cell in that row - you can use *images_at* method for retrieving images in that cell. -## Remote files - -```ruby -remote_url = 'http://dev-builds.libreoffice.org/tmp/test.xlsx' -Creek::Book.new remote_url, remote: true -``` - ## Contributing Contributions are welcomed. You can fork a repository, add your code changes to the forked branch, ensure all existing unit tests pass, create new unit tests which cover your new changes and finally create a pull request. diff --git a/creek.gemspec b/creek.gemspec index 45cdc00..09dc319 100644 --- a/creek.gemspec +++ b/creek.gemspec @@ -28,5 +28,4 @@ Gem::Specification.new do |spec| spec.add_dependency 'nokogiri', '>= 1.7.0' spec.add_dependency 'rubyzip', '>= 1.0.0' - spec.add_dependency 'httparty', '~> 0.15.5' end diff --git a/lib/creek/book.rb b/lib/creek/book.rb index 8c51151..a6d8fda 100644 --- a/lib/creek/book.rb +++ b/lib/creek/book.rb @@ -1,7 +1,6 @@ require 'zip/filesystem' require 'nokogiri' require 'date' -require 'httparty' module Creek @@ -20,13 +19,7 @@ def initialize path, options = {} extension = File.extname(options[:original_filename] || path).downcase raise 'Not a valid file format.' unless (['.xlsx', '.xlsm'].include? extension) end - if options[:remote] - zipfile = Tempfile.new("file") - zipfile.binmode - zipfile.write(HTTParty.get(path).body) - zipfile.close - path = zipfile.path - end + @files = Zip::File.open(path) @shared_strings = SharedStrings.new(self) end diff --git a/lib/creek/sheet.rb b/lib/creek/sheet.rb index dc76f1e..82327ee 100644 --- a/lib/creek/sheet.rb +++ b/lib/creek/sheet.rb @@ -11,10 +11,17 @@ class Creek::Sheet :state, :visible, :rid, - :index - + :index, + :headers + + # An XLS file has only 256 columns, however, an XLSX or XLSM file can contain up to 16384 columns. + # This function creates a hash with all valid XLSX column names and associated indices. + # Note: load and memoize on demand + def self.column_indexes + @column_indexes ||= ('A'..'XFD').each_with_index.to_h.freeze + end - def initialize book, name, sheetid, state, visible, rid, sheetfile + def initialize(book, name, sheetid, state, visible, rid, sheetfile) @book = book @name = name @sheetid = sheetid @@ -23,13 +30,10 @@ def initialize book, name, sheetid, state, visible, rid, sheetfile @state = state @sheetfile = sheetfile @images_present = false + end - # An XLS file has only 256 columns, however, an XLSX or XLSM file can contain up to 16384 columns. - # This function creates a hash with all valid XLSX column names and associated indices. - @excel_col_names = Hash.new - ('A'..'XFD').each_with_index do |col_name, index| - @excel_col_names[col_name] = index - end + def column_indexes + self.class.column_indexes end ## @@ -56,15 +60,30 @@ def images_at(cell) ## # Provides an Enumerator that returns a hash representing each row. # The key of the hash is the Cell id and the value is the value of the cell. - def rows - rows_generator + def rows(headers: false, header_row_number: 1, metadata: false) + extract_headers(header_row_number) if headers + + rows_generator(include_headers: headers, include_meta_data: metadata) end ## # Provides an Enumerator that returns a hash representing each row. # The hash contains meta data of the row and a 'cells' embended hash which contains the cell contents. - def rows_with_meta_data - rows_generator true + def rows_with_meta_data(headers: false, header_row_number: 1) + rows(headers: headers, header_row_number: header_row_number, metadata: true) + end + + # Parses the file until the header row is reached. + # Returns the headers as an array. + def extract_headers(row_number = 1) + return @headers if defined?(@headers) + + # Extracted row numbers are String, convert it here to facilite comparison + @header_row_number = row_number.to_s + + rows_with_meta_data.each do |row| + return (@headers = row['cells'].any? && row['cells']) if @header_row_number == row['r'] + end end private @@ -79,52 +98,65 @@ def rows_with_meta_data TEXT = 't'.freeze ## - # Returns a hash per row that includes the cell ids and values. - # Empty cells will be also included in the hash with a nil value. - def rows_generator include_meta_data=false - path = if @sheetfile.start_with? "/xl/" or @sheetfile.start_with? "xl/" then @sheetfile else "xl/#{@sheetfile}" end + # Returns an array or hash (with headers as key) per row that includes the cell ids and values. + # Empty cells will be also included with a nil value. + def rows_generator(include_meta_data: false, include_headers: false) + path = + if @sheetfile.start_with?("/xl/") || @sheetfile.start_with?("xl/") + @sheetfile + else + "xl/#{@sheetfile}" + end + if @book.files.file.exist?(path) # SAX parsing, Each element in the stream comes through as two events: # one to open the element and one to close it. opener = Nokogiri::XML::Reader::TYPE_ELEMENT closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT + Enumerator.new do |y| - row, cells, cell = nil, {}, nil + row, cells, cell = nil, [], nil row_number = nil cell_type = nil cell_style_idx = nil + @book.files.file.open(path) do |xml| Nokogiri::XML::Reader.from_io(xml).each do |node| node_name = node.name - next unless node_name == CELL || node_name == ROW || node_name == VALUE || node_name == TEXT + next if node.node_type != opener && node_name != ROW + if node_name == ROW case node.node_type - when opener then + when opener row = node.attributes row_number = row[ROW_NUMBER] - if spans = row['spans'] + + if (spans = row['spans']) spans = spans.split(":").last.to_i - 1 else spans = 0 end + cells = Array.new(spans) - row['cells'] = cells - y << (include_meta_data ? row : cells) if node.self_closing? + + if node.self_closing? + y << to_formatted_row(row, cells, include_meta_data, include_headers) + end when closer - y << (include_meta_data ? row : cells) + y << to_formatted_row(row, cells, include_meta_data, include_headers) end - elsif (node_name == CELL) && node.node_type == opener + elsif node_name == CELL attributes = node.attributes cell_type = attributes[CELL_TYPE] cell_style_idx = attributes[STYLE_INDEX] cell = attributes[CELL_REF] - elsif node_name == VALUE && node.node_type == opener + elsif node_name == VALUE if cell - cells[@excel_col_names[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx) + cells[column_indexes[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx) end - elsif node_name == TEXT && node.node_type == opener + elsif node_name == TEXT if cell - cells[@excel_col_names[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx) + cells[column_indexes[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx) end end end @@ -133,6 +165,24 @@ def rows_generator include_meta_data=false end end + def to_formatted_row(row, cells, include_meta_data, include_headers) + if include_headers + row['header_row'] = row[ROW_NUMBER] == @header_row_number + cells = cells_with_headers(cells) if @headers + end + + if include_meta_data + row['cells'] = cells + row + else + cells + end + end + + def cells_with_headers(cells) + cells.empty? ? {} : @headers.zip(cells).to_h + end + def convert(value, type, style_idx) style = @book.style_types[style_idx.to_i] Creek::Styles::Converter.call(value, type, style, converter_options) diff --git a/lib/creek/styles/constants.rb b/lib/creek/styles/constants.rb index bf85510..4cde485 100644 --- a/lib/creek/styles/constants.rb +++ b/lib/creek/styles/constants.rb @@ -36,9 +36,6 @@ module Constants 48 => :bignum, # ##0.0E+0 49 => :unsupported # @ } - - DATE_SYSTEM_1900 = Date.new(1899, 12, 30) - DATE_SYSTEM_1904 = Date.new(1904, 1, 1) end end end diff --git a/lib/creek/styles/converter.rb b/lib/creek/styles/converter.rb index 5f5e912..aaefba4 100644 --- a/lib/creek/styles/converter.rb +++ b/lib/creek/styles/converter.rb @@ -74,16 +74,11 @@ def self.call(value, type, style, options = {}) end def self.convert_date(value, options) - date = base_date(options) + value.to_i - yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-') - - ::Date.new(yyyy.to_i, mm.to_i, dd.to_i) + base_date(options) + value.to_i end def self.convert_datetime(value, options) - date = base_date(options) + value.to_f.round(6) - - round_datetime(date.strftime('%Y-%m-%d %H:%M:%S.%N')) + base_date(options).to_datetime + value.to_f.round(6) end def self.convert_bignum(value) @@ -94,17 +89,9 @@ def self.convert_bignum(value) end end - private - - def self.base_date(options) - options.fetch(:base_date, Date.new(1899, 12, 30)) - end - - def self.round_datetime(datetime_string) - /(?\d+)-(?\d+)-(?
\d+) (?\d+):(?\d+):(?\d+.\d+)/ =~ datetime_string - - ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0) - end + def self.base_date(options) + options.fetch(:base_date, Date.new(1899, 12, 30)) + end end end end diff --git a/spec/styles/converter_spec.rb b/spec/styles/converter_spec.rb index a86e75c..c9628bc 100644 --- a/spec/styles/converter_spec.rb +++ b/spec/styles/converter_spec.rb @@ -16,7 +16,7 @@ def convert(value, type, style) describe :date_time do it "works" do - expect(convert('41275', 'n', :date_time)).to eq(Time.new(2013,01,01)) + expect(convert('41275', 'n', :date_time)).to eq(DateTime.new(2013,01,01)) end end end From 63e1e87176b3f781b7c707806db9d1b8de78bd92 Mon Sep 17 00:00:00 2001 From: Baptiste Jublot Date: Wed, 3 Feb 2021 20:08:58 -0500 Subject: [PATCH 4/5] Typo in comment Co-authored-by: Jean-Philippe Bougie --- lib/creek/sheet.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/creek/sheet.rb b/lib/creek/sheet.rb index 82327ee..5845839 100644 --- a/lib/creek/sheet.rb +++ b/lib/creek/sheet.rb @@ -78,7 +78,7 @@ def rows_with_meta_data(headers: false, header_row_number: 1) def extract_headers(row_number = 1) return @headers if defined?(@headers) - # Extracted row numbers are String, convert it here to facilite comparison + # Extracted row numbers are String, convert it here to facilitate comparison @header_row_number = row_number.to_s rows_with_meta_data.each do |row| From 2df093511ebdc7c4329e79a8bf98045030aab628 Mon Sep 17 00:00:00 2001 From: Baptiste Jublot Date: Wed, 3 Feb 2021 20:18:57 -0500 Subject: [PATCH 5/5] Return nil for empty header row and comment about it --- README.md | 4 +++- lib/creek/sheet.rb | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5a87c78..5faf9c0 100644 --- a/README.md +++ b/README.md @@ -51,13 +51,15 @@ sheet.rid # => 'rId2' the rows as hash with the headers as keys. Also, a `header_row` boolean is added to the row metadata. See examples above. -Headers (as an array) are loaded once by parsing the file a first time until the `header_row_number` is reached. +Headers (as an array or nil if empty) are loaded once by parsing the file a first time until the `header_row_number` is reached. Rows are then returned normally as an Enumerator as usual (new Enumerator instance starting from the beginning of the file, that will include header row as well). It's the caller's responsibility to filter the header row as needed. `extract_headers` can also be called manually from the sheet instance. Once extracted, the headers can be accessed through the `headers` attr_reader. As headers are matched to their respective value in the row by index, it's possible to modifies the array in `headers` to customize the headers (to fix typo, make them unique, etc.). +Empty header row returns `nil` and the rows are then returned as an array (same as calling `rows(headers: false)`). + ```ruby creek = Creek::Book.new 'spec/fixtures/sample.xlsx' sheet = creek.sheets[0] diff --git a/lib/creek/sheet.rb b/lib/creek/sheet.rb index 5845839..5fdb503 100644 --- a/lib/creek/sheet.rb +++ b/lib/creek/sheet.rb @@ -74,7 +74,7 @@ def rows_with_meta_data(headers: false, header_row_number: 1) end # Parses the file until the header row is reached. - # Returns the headers as an array. + # Returns the headers as an array, or nil if the headers are empty. def extract_headers(row_number = 1) return @headers if defined?(@headers) @@ -82,7 +82,10 @@ def extract_headers(row_number = 1) @header_row_number = row_number.to_s rows_with_meta_data.each do |row| - return (@headers = row['cells'].any? && row['cells']) if @header_row_number == row['r'] + if @header_row_number == row['r'] + @headers = row['cells'] if row['cells'].any? + return @headers + end end end