From eb6ead7303797af3b6abaf0bf55d377098f5d848 Mon Sep 17 00:00:00 2001 From: Eneko Martin-Martinez Date: Mon, 1 Jul 2024 14:55:15 +0200 Subject: [PATCH] Add support for CSV TAB externals --- pysd/py_backend/external.py | 72 +++++++++++------- tests/data/not_implemented_file.ods | Bin 11212 -> 0 bytes .../pytest_types/external/pytest_external.py | 28 ------- 3 files changed, 44 insertions(+), 56 deletions(-) delete mode 100644 tests/data/not_implemented_file.ods diff --git a/pysd/py_backend/external.py b/pysd/py_backend/external.py index 35e4ffae..df18110a 100644 --- a/pysd/py_backend/external.py +++ b/pysd/py_backend/external.py @@ -29,11 +29,23 @@ def read(cls, file_name, sheet_name): if file_name.joinpath(sheet_name) in cls._Excels: return cls._Excels[file_name.joinpath(sheet_name)] else: + # get the function to read the data based on its extension + read_kwargs = {} + ext = file_name.suffix.lower() + if ext in ['.xls', '.xlsx', '.xlsm', 'xlsb', 'odf', 'ods', 'odt']: + read_func = pd.read_excel + read_kwargs['sheet_name'] = sheet_name + elif ext == '.csv': + read_func = pd.read_csv + else: + read_func = pd.read_table + # read the data excel = np.array([ pd.to_numeric(ex, errors='coerce') for ex in - pd.read_excel(file_name, sheet_name, header=None).values + read_func(file_name, header=None, **read_kwargs).values ]) + # save data for future retrievals cls._Excels[file_name.joinpath(sheet_name)] = excel return excel @@ -109,34 +121,27 @@ def _get_data_from_file(self, rows, cols): depending on the shape of the requested data """ - ext = self.file.suffix.lower() - if ext in ['.xls', '.xlsx', '.xlsm']: - # read data - data = Excels.read( - self.file, - self.sheet)[rows[0]:rows[1], cols[0]:cols[1]].copy() + # read data + data = Excels.read( + self.file, + self.sheet)[rows[0]:rows[1], cols[0]:cols[1]].copy() - shape = data.shape + shape = data.shape - # empty cells - if shape[0] == 0 or shape[1] == 0: - raise ValueError( - self.py_name + "\n" - "The cells are empty.\n" - + self._file_sheet - ) - - # if it is a single row remove its dimension - if shape[1] == 1: - data = data[:, 0] - if shape[0] == 1: - data = data[0] - return data + # empty cells + if shape[0] == 0 or shape[1] == 0: + raise ValueError( + self.py_name + "\n" + "The cells are empty.\n" + + self._file_sheet + ) - raise NotImplementedError( - self.py_name + "\n" - f"The files with extension {ext} are not implemented" - ) + # if it is a single row remove its dimension + if shape[1] == 1: + data = data[:, 0] + if shape[0] == 1: + data = data[0] + return data def _get_data_from_file_opyxl(self, cellname): """ @@ -1054,8 +1059,19 @@ def get_subscripts_cell(self, row_first, col_first, lastcell): if col_last is not None: read_kwargs['usecols'] = np.arange(col_first, col_last+1) - data = pd.read_excel( - self.file, self.sheet, + # get the function to read the data based on its extension + ext = self.file.suffix.lower() + if ext in ['.xls', '.xlsx', '.xlsm', 'xlsb', 'odf', 'ods', 'odt']: + read_func = pd.read_excel + read_kwargs['sheet_name'] = self.sheet + elif ext == '.csv': + read_func = pd.read_csv + else: + read_func = pd.read_table + + # read the data + data = read_func( + self.file, skiprows=row_first-1, dtype=object, **read_kwargs diff --git a/tests/data/not_implemented_file.ods b/tests/data/not_implemented_file.ods deleted file mode 100644 index a56eb466e25e6b7b11f1826494c6bc12c977a24d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11212 zcmcI~WmKHm(ry!+5Fj`NOG0po;O?%$T^k8b1HlOv+}$05dk1LT-6c3Q?ry<8IA_ht z%=spB@BMMB*Y4hLueYA@Ry03OT3UjPeJ3!t5o zHBism+T7GY&(74!lF`A^kikk1WC~)ivIbfjS{c||04?npK-RWEJwuQQ5NId&7t9Bk z{|M1TP0-5H$kf>0_FrhA_l$Z520(M*!=sfg$ z-zeKzS(#hwSpv=fLGSOnYi(;~YzqW|^!5I!yMNar$WG7B{{Ntd|C^rvb8CO6Wn^xp zX9xU$?W47owY~MjdfNZPto&VTk1D_Vh=hdnukG|OnEwdsL(M|Z($okDvSY9{G#ZE+ zu=IY5;d8*{3mp8x#|RH^OVwNZDXp3Lt13L>aU$kIV`!AJe0ITh+W1O|$cM$ys-!(V zqVAxIZ!L;U2GnnQ#Z0QVXgMGZwK=mKGe5wzq>02wpS*L@XzD&&(y;5q`qHzanh#ZG zQaL7`aLLxcYMEfO5ku{)3PnOD2@x5n!RT3?g zlSD+Sx-c1qhXDY*VF7@D@BhD>zJHxYpq(CrqlI~hLZ9XATa5Y(S~wP!z|5i=+?UjR zp19oPCY~I8RtVCTiY!c~a*C^WZf~}q&mLy5?(O#P_Y2UG$RH9oc$!5szsa=mdiAzd z0=SLIZPf&Q=KPHZ`+Q9pS$8z>9MHJ}>qv6;aja}cJYVv3_WJef?47zAjgH9)a_LB% z{D9Te6;}9pQ6_2xn*ep=__dHv@ym!N@p%0tE&a4VIOE_o4BSS0>lbE3PPiyDI2CeK z9lIvCnj^iweAROoG?D7*ubyOBZRv>r92yZ(uKgq9=OWVBo`C<>_3jdN!TJuGTHz~l zT(p`4f|@20Mbk)%_>WRi^bN*^&m>LxEe}PC-}lm%*len)xV=04ltsB;|2}Fr@uOG^ z6RUln*|+AUjWl?6zprH)K^p2mt~bmnvH^t0d*yAK%-yhVvea1jM$ zcuh}g&vpwu9@je*epRmSVZE#ValKI zLaw42%8w0D1Z8T4*{ap2C@n;V_odRyQ4d8~Z@d@Px4b=GE{<83D?`8DO zk|ZiJRErFwEf*ebE!{+^1uP{KHI3OqHbM5fI4SlWb89=_b6>bF1q-{8Tvyl6kihAx zBWx~vbR7Pouu$A(Eb^4S7IU2dmfZDC zJ`tg8!3{eJ#)l0I6)D`DTz%*IA^rnLUPeRA6^-9!ql!BJ>0of%*B{l=B6`inB!{=D zqpBKGh_`BWw$Aazi6qwP%L0^QMy#XjY9Lu{P> zbHUUrz6v4nQP$b%`>*jHC~apXe9_}lr?35OjuirASRCTD^ZcLN$LFNMV5LCDQAS7- zuE`r*YIfZ^UiuRX(~b}eRLR1@Vxw4eWH>+Yef1T}i3Zst2^0=95 zazQ73Axww0FU3*Wv!Mi`BT=B&-1HPqm*|bI#8k|F+2+YVEZbwi_eGYEi&Y=|giqi( z2yVLL8v!2C3kC@)|4S5IEzL|S(cmee2^)dIufvCjOn`_c0MCwZgo(8(G|MO8;##n& zzf~6*L8w|8!-u*udjFbhPhN8@H0(lYq23F5NwSg=H?O;U;~SUwApbM#9C-3$OeKGjhUlZSBy2 zbHY0GJ%pxU4ky4p;_4dnBuaEeP+T`(2MO9Q`5Lche)R(q?%}N;soT#n3nLGVF3;9Z zs!PAxHx=Hjq>>SvtZPyc0N8$%xzDM_F8m?3W+w-&QLH1qpl%MTje9sVS`n4DgnGfp8E7i~|o@zs$HwuLFmsoy9 zU^}*t08MlFyG&R`w0w)>-<%k8#wGjqT@Dtuq&@eE&VlAqNIU)oIx?cLo;OjHW+=OM zcsA2Y6$}P8Jyjp_S?G9d@;E$F2-~>fAiO61$?!~N|1(s~ZmibwXz)e)sfZV<%=24Z zX+i^QHZJvoy(KzMR$FQH@TTpSEji3;#wF3Y-7RN(1&0l#p^@@wQJW8RIILzXuC>Jq z#O2A_I@7S66A8-wt<+$ffIr6D5jaeqPN>2SNZpfylAw8Ci;g&+Ex5I=h0N z4$7vsPx%lL;m1|`aD)&F;vkFpM9)!G9To6LaCoNasjD`8XcMqF%21RmDsNkM?ny;oHTu!1~JERIr#)Oo*r`c1+7S; z11j;*?{rv{#&Q&?(}L1px18bvM5W4JK;1Zi(ERnq>b~(FaigW4{r884q6Cf=?_pVa z<_Jy0aKkRz3)Jel64jW5(L6G}U&HbWLg8-=kkR3gxhgk612br72En;>x}_NPIsHTr z3RsBcazZ4w!RJK=-+{ysgV>FR!#ADJ@A>3+Vv{RLj`oH>cns;rpF72~4G$nvc83)( zvSas&5wvreDnZqshT^~t_|hJmE^ib54!VlCkZll?RGZ4R5}%A3z; z1Oa}m*LB%f)u$q8h1BCj?7;IkLHRG|QLjre4y}|mbXBdzzDZ3c> z7YV#usB+blP|J{_X~ne^{7NXu4u~myAg#f#`T_kDI2`0IjteHTTGHpVH8WI^-}OA@Ysw6>GjO zzYVN#lw6X)W%J~va78K7J!0p+NPUcIQhUvcKmGw`V645pQk3ikLEwcoe}J+(5fXOH zW!;ze2}DHEO`hH^$bMO`$wCz%+xuAI_fIe_)Wu`@*{M73WRNKQhLE--wP{}-wGSzW z7pZ96@3Kjat(X&>mnipz@?)CU=*gv9K%b%nz+Sc1El0FXmQBc&S%AlWJOMLr&t|o< znEP5dn*7i_8qeQVle1==(ZSKjucHnGo`ISEzc=;YTrb`s!=FvupR*5+Y~=UjrP zU%F*S+jD;=KTu37ggMzSzRF!Q%hS{smMIJ_gl;UC3C|*x_Z?VZdh@Kod@zRG9EWgD zte4LdTq{-KG)1tfV5s7G_pD*`c#{h7xlZEog_T87G>C~~#R%^~ae z5MV`;R|`>*<=ZL;^?Xl9pa$SmzEBZkIX(CrG6)G^7cS+TmVXkO>10TVCf z8T1!ktqfrjOw4y|bZt98_DE{XwaB>yUg0r5laDW4ljTa7lyqicX51Y6PIk&5zd6RP zL3HcXcoC%TziFl^inSut5Tt)QJVvbOQlZJ-Xw{GhJa&s=x4wExi~~cHl(t(dH;kP z1np<*#mg{f7>gL==&et;`lsu7lzVtaZXLRl_oOe4?hzjy@)>JQKeeX-fV}=cUBDh* z02o+V+C9cP9zAkR3#(Z+%scm{)$CA@mTY^a(#lcxT^JjYm&$zR6mNSDc;i7^b0XxL$10VbVkHmk`PT9)z!tb z`;#^9x~+TH`>?6GlX%HIA4pKR2udy2$3rXhqSx#p@G*zTv|~L2LJFa8qQ;Q?6lG#K z*WQJ^pWVtG7{8RB(W36!F5y+7N#z){t*zG#Wvo3+Vf0{A|B9_eO*wWB|B^rD+~h4j zXQNt@8rnruSHr;=9npyj>2VuS430>fuo=qaUXJnjw~Ozr-{AD6pxt^n(*RI_St zdpnlDJ<74?W)@pG+lhxI9imU1i@)$wp4CJfNCCniCLN0sHWMEnK}in+y8ZIcO; z^dZXJ@Z-+)>a`}sAAK!Csx3zxaLP7^j0jCkbGL0YC}|1E!dU50F3cgzBtJrqJM2L5 zM*?sNU!1`0v+5d|KYdX&_f)qZ*~B?^`NO zNh%V~IkYZth)`D|U4F(OPni%v&%ySZsDJr%jxCMP#NG9$YIeh3urdLIR*U|QVuDFr zcYj;wo>&*Y+xQikto2NT`CEMU0LO;0h^mLFEh?3|&fni#H*R(~lf~ zIo9xtQY`1Sfriz}R3FIQiNSj?+NEmw3Pe$dnO#2fT|beEX;|E>xIjd=8AmG4yuch@ zKpyl@@L%Bk>Hm7yTkmzVy2Xol*^eWK>}(z@fs~=7?pM_pR~gUsm5y{blPL!C_8c>G zN3cN=Ta@SsCzVXNap)jpYKJJei~=NFz40Nspb)CB(*7tNU&u((2H`q9G6{S!{Lrd* zinORk(y-)#S` zKwmaKVY4^5<0Ri(&uS_=F~B@Da=~tam_dunO=;D;v+dXCo=?y5re?)~;l3~wvy6K0 z(GC>?8TYiTBIWe6!m&2lF~kPWJ$anbPVYZC?!Ws2sS4<3>i*yy?8_sQh)#X;dAu!% zPbmOR#NJf`EgT84-|rbURQ{#f=2&NLh!J+OBT{&e5N$gg-kQw|!A3c26aXIynxR_u zY((PI#={LN?l%0X5Q%N-l1==@oCb7gGn2$Q9upd3IZYK%Mkv03kkmFbpk$aOSx{KW zmtW2+&i=bp7S&EEK-@)CE@g?TO%6NJULuFNQ@lw7`}E7e6;*m+-N3x#(Au**kkbdR zf?krMV)XBV6|rx3PmnNY==da}RT$%_F@h3fn59G`8GU%v9L;0Imlb7f?gpe}G*pjs zn#;vA=jc#I)1s>S;4@wT;Owy`_yX=k(uEsy$8;6s{F@aiHlBozh}o>ZQwqrHVqye3 zynHQ0S)DB~lyk2m)={5lC)@q{1uo$FZJ-lyLt`jhyQiB-x*ty>AbT!}1*!Z{&F9JX z7P0t&h<$=AwR#RGh&bi zh7{lFsb#Q8%m$gt<7xZCz9eFISy$Ek^0Z4^ty@_gze~;&i^e`z^3~htaeCPuRv{`1 zf_Y>Eab)=gLgJCx)-%M~KpJxKdGQJcdsv+Qms&CioLWY+Vjc8~jm$h9UuB1BIfyQ3 zH(TZz!lzWePT(AXt#Do=>KpNf0qw?$L$Yj9vh0kFyT#f~9q~qKQF7C#uB(uklcd|^ zgWmW(@+{j((zBg)J{l_R+n?`@E7VWlcgh<&#MiAROLpzBU;o^NuLLByxPXRn3exK1 z`X>CmrzBW-iMR~?(21Wdy^Fngd)2aLy^syJ*N+t&)5|k*2~II|3#}dtsyCq(CPwS?>jLuQ>SqdwidV5=K z^Tt}o2q)9f`W=SwpPM?<^}80`eW^5qAErB5cl^+j5B%pt7dwcqgB{o$ML9Ye0Btf* z3^xiZ#Gcu;j-8LIRmX70QVXO%`Q`mK-Nr@1#>c7JN>kCM+@nfy1vlx(r1%M3xsf13UJrlEI*PyGO&6)QLH{P zfi?{EMW8l+%fX}LTz@+dnbTwx&P?HPKz6NodtH9v0sH8isC4r>7(B#&zbOBcZ}J#h z2LbKuOf8N79WGSY`V3{ka62gNKH^+{trH~MO44d6p^5Zy`dt%(1|{1IJf(O^EKU88iLYNS2_E%c4)}F>I~3D_@#3|q7u`=Su!QfRxrN~ zAVFUj$T~qW%u21ITsq4Ev2=fWqBZ2>*c|?Lf@;?bK&pyjjT5qoBC>- zw^zc~`YM-SWxWvANQzqelDxh-=0vh5GV50cwPDP=uruahCL$;^QgjU3$P{&^v-9Q& zA+Dwoxp4#iz1;arkY+styye8O4WbpdLJmdsnhWXI%~pAX!WWeU@2+7|zQ~`wa1Ph; zdV>BXnH1N^n#y;yy#orljllJ%Ip;B8oe;B}uKOapklMHyKj`^zYwduI_7l_KX* z>g9OMe|_CuP-pBqjH>U}g+=9NWf%5D8(QUn;NP>n&a98%GSecaZeYwYfc;*V;ZE|@ zpK}x?dbXqA7$g3Yac7bD-5sT3Zu)@vPkj3?sKPI`IO2>`20MVy`aY3S3BIb{$QYu; zf9B~(){k!!3y?4ifjM=#!k`ghXM5rCRf;2g=&%}J9G`dm3^vFNsW^<(i7q9_ZpI*L zUeYg0HxTO;&Z@YQpC`5|uYxm@aepgZHie3*)yTf4k|Ko5)yBJ=vL0)Yt1h!*{5G*U zD5X>)1-r2+cXWDFUx+MEr&sN#feOd$$^sMyb9_JV=a#Qrc)au?sQ!K=%@fyySGR1^ z9_`t9N{JI{x2FC#M(XADfnYm)G~IR2%7HAn1L+scfpP;hHsS3Ciu9@iN?cHCS!E4`Xakf)@!B8H?2a z72=N@`^lGnv^XMS^#PNBac3*ti0h127(L5wf0{QVsrkK%fwtW!xqZWFHUiUO<0qCl zyHR)YuFh57b{6kC$pJQ$!;H~wgkD^#nEaX+)FoqZ-HwzK?nX?RH&^ctnc({t2^3vd z^nPNHQ7kL`bn1sCeH%@Y{lihgh0z1PB?`rf$X3NSd#OW|`0J=rC^9F!*2RnGDK1xf z*TPX*J!Y%X8AEnmvF0s0)+}Umz9>%=XB?TEu9^$f&bQPK9x{W7NJLmdrrK}-Kt07D zcIl54vaxLOGeblG;ITYr3KdQ4E%Yt*OwB=zc7IeEtSyZLWjUrkF|Rs$S2g*)5pmt$UQjzi;JPByGf{rg}1A7 zl!vw77nk^8P)fABmzTG%e^7{@caXn#aB#3^Sb|SfN^n$SL`YDQ&zHnd@1*driIKsn z37%2WF;TIpY4K6X$tgkcnW3rqG09mGX+^2oS+Q9asrh9Y!Ctu$zUi@{-y*}ZQsQ#c z;tOLV3X>zs6C<-TQVTK@!RaxT=_!@p5^J(z>+_Ryb22mY^K(k8vr6jnOTY!?;KGW! z%F@F6@~q0Ls*36ca7}Gpby-~vxDEoT2Denzx79VZ6?cq6S~?rrdMbLy>N|%Ud&X)< zry9nlTPm|UYYPUzr9F`1wmNWYL+wy)S!Xk3sI{`Et#P=sW~{q*rtSMef6Y>R)ABf^ zwWGhktEIQUzoT=wvwy5_Xsly$wr_Z1XncBdsAFcNeQvyObZT~Dc5!-gba8fSZho$J zX1RM|f8rtkvvknEdN{eVu>f6PT-%%5+#X&(p4>WHSU&x+b2fKyxo~v8cyzTi-UglM z+Zr8$&Wx@Uu3EDxiT$n^>};jcz^r!c<?Lm1QquTvu9`mMx!u`Bl z7f~~>*1HhqvPvs$TK(=9k5#k_vl&A~KEZZ`X;U4Ag{BdZ-vy!Xs*$xb28d)*K50)& z6-(y+*53$mleE5cp;SJ`LZ_}IMMp<4-FtwzzdwT9RADY|dK9$l99<+F-xu@_ZZdK; zTIlJB;k(INurPh<@%S7A*S;}I3TkVYs6H7M?My)QE8 z#cL?26d>TXDLLC6QoJ;qnOEYa&QeJ{5%oPh4k&PugNDrhP zzNYS=O(9`jEwvgin5*JD^5E>a@U58Gkv;bn6dB}YHdP1n9CBHyEFYnBTaAvit~ALU z*ON}OEh9!PP7D{f-#dvAHXQJ4xHBXmpFO=cvJIRJlzEqW>*}Ux`hDE3soi2@k^iQDW`SFmiE13Ks+i}w2ob!g|<-A7Pc!A|B@k%7#^qbH)jT~5z`U`hI zdXG!5NfjKo8!pq!OLp*0;|I*4Brq+Q5&1;xiuNhbL1~aHX<5a@EYxx)$5pd@ivFFu zFoda5UF@C&eF_=4s&fx`Sljo4Ci2V(tNRw>hXR2L2SL6NzF+k4CTO`0y;#^Pio5DUeHnOo8Ysk zB%caqBFk^pmJ=~M^#%#4!FJibWe;1=YMepDr+765c=cXy_X2ONPsHMib^)Wc4>BLp z8WLGOdBHZX8WM?naANM*GQeZfd+(GH$)g!l@KJr_Sb7a?#!IP{?~s<0ty1NYi!kl9 zgc!}6tb7N%TJd0&XD_&3=j4Z5m!EczuLt>#TuY>tl&z^$O(Kmoa!<-q;t+doVZvabX#u5&>PW-_uEO9^R4`R^+D_ml0w7 z4=v{*u2~!)~A)xREp52=5+sh z!HrNpo#LXBLPoJuu19*4qV%PB3K#O#jkIxF7Cc+R(jZ_)7Lkmg#O}AnVxbyJ)gZwl zuEofMv6 zzoJSB1+=$g#x#f;K!`-0)q^N;K8k2_G*_|9_O#agMFCB|Hk<%&g7PRw8uchFOhvf`H$^*p!EH*iT}|2Gt=pjzxGQ)ADsLLX6e7!Z2ztG z=XA{D?ckTV!~dBR_1}7b4wXD6Ykmp(^S`8S{#)_ScbUHqu#n)dag{$be|OjU)ubTF q?{7YTp!_jSf3_ZvcKMggzxj=omXm}>czo^o!w2|abmS=?pZ*7Z%J41# diff --git a/tests/pytest_types/external/pytest_external.py b/tests/pytest_types/external/pytest_external.py index e5296531..4f3aeaff 100644 --- a/tests/pytest_types/external/pytest_external.py +++ b/tests/pytest_types/external/pytest_external.py @@ -1968,34 +1968,6 @@ class TestWarningsErrors(): Test for the warnings and errors of External and its subclasses """ - def test_not_implemented_file(self, _root): - """ - Test for not implemented file - """ - import pysd - - file_name = "data/not_implemented_file.ods" - sheet = "Horizontal" - time_row_or_col = "4" - cell = "C5" - coords = {} - interp = None - py_name = "test_not_implemented_file" - - data = pysd.external.ExtData(file_name=file_name, - sheet=sheet, - time_row_or_col=time_row_or_col, - root=_root, - cell=cell, - coords=coords, - interp=interp, - final_coords=coords, - py_name=py_name) - - error_message = r"The files with extension .ods are not implemented" - with pytest.raises(NotImplementedError, match=error_message): - data.initialize() - def test_non_existent_file(self, _root): """ Test for non-existent file