From a4d214f86fb181dbc51276798dc14836dcca7d41 Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Mon, 3 Oct 2016 18:58:31 +0530 Subject: [PATCH 01/15] Modified load/save methods to maitain compatibility in loading and saving LDA models across Pythong verions --- gensim/models/ldamodel.py | 52 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 7def8966b3..adafeaaf49 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -43,6 +43,7 @@ from scipy.special import polygamma from six.moves import xrange import six +import json # log(sum(exp(x))) that tries to avoid overflow try: @@ -979,7 +980,7 @@ def __getitem__(self, bow, eps=None): """ return self.get_document_topics(bow, eps) - def save(self, fname, ignore=['state', 'dispatcher'], *args, **kwargs): + def save(self, fname, ignore=['state', 'dispatcher'], separately = None, *args, **kwargs): """ Save the model to file. @@ -1018,7 +1019,41 @@ def save(self, fname, ignore=['state', 'dispatcher'], *args, **kwargs): ignore = list(set(['state', 'dispatcher']) | set(ignore)) else: ignore = ['state', 'dispatcher'] - super(LdaModel, self).save(fname, *args, ignore=ignore, **kwargs) + + # make sure 'expElogbeta' and 'sstats' are ignored from the pickled object, even if + # someone sets the separately list themselves. + separately_explicit = ['expElogbeta', 'sstats'] + # Also add 'alpha' and 'eta' to separately list if they are set 'auto' or some + # array manually. + if (isinstance(self.alpha, six.string_types) and self.alpha == 'auto') or len(self.alpha.shape) != 1: + separately_explicit.append('alpha') + if (isinstance(self.eta, six.string_types) and self.eta == 'auto') or len(self.eta.shape) != 1: + separately_explicit.append('eta') + # Merge separately_explicit with separately. + if separately is not None and separately: + if isinstance(separately, six.string_types): + separately = [separately] + separately = [e for e in separately if e] # make sure None and '' are not in the list + separately = list(set(separately_explicit) | set(separately)) + else: + separately = separately_explicit + + # id2word needs to saved separately. + # If id2word is not already in ignore, then saving it separately in json. + id2word = None + if self.id2word is not None and 'id2word' not in ignore: + id2word = dict((k,v) for k,v in self.id2word.iteritems()) + self.id2word = None # remove the dictionary from model + super(LdaModel, self).save(fname, ignore=ignore, separately = separately, *args, **kwargs) + self.id2word = id2word # restore the dictionary. + + # Save the dictionary separately in json. + id2word_fname = utils.smart_extension(fname, '.json') + try: + with utils.smart_open(id2word_fname, 'wb') as fout: + json.dump(id2word, fout) + except Exception as e: + logging.warning("failed to save id2words dictionary in %s: %s", id2word_fname, e) @classmethod def load(cls, fname, *args, **kwargs): @@ -1032,6 +1067,19 @@ def load(cls, fname, *args, **kwargs): """ kwargs['mmap'] = kwargs.get('mmap', None) result = super(LdaModel, cls).load(fname, *args, **kwargs) + # Load the separately stored id2word dictionary saved in json. + id2word_fname = utils.smart_extension(fname, '.json') + try: + with utils.smart_open(id2word_fname, 'r') as fin: + id2word = json.load(fin) + # id2word = {int(k):v for k, v in id2word.items()} + if id2word is not None: + result.id2word = utils.FakeDict(id2word) + else: + result.id2word = None + except Exception as e: + logging.warning("failed to load id2words from %s: %s", id2word_fname, e) + state_fname = utils.smart_extension(fname, '.state') try: result.state = super(LdaModel, cls).load(state_fname, *args, **kwargs) From 04a4634b4720379af7a416aaa5da15b0415a583c Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Tue, 4 Oct 2016 17:59:08 +0530 Subject: [PATCH 02/15] Added saved LDA models in Python 2.7 and 3.5 environments for testing compatibility --- gensim/test/ldamodel_python2_7.tst | Bin 0 -> 3298 bytes gensim/test/ldamodel_python2_7.tst.eta.npy | Bin 0 -> 96 bytes .../test/ldamodel_python2_7.tst.expElogbeta.npy | Bin 0 -> 272 bytes gensim/test/ldamodel_python2_7.tst.json | 1 + gensim/test/ldamodel_python2_7.tst.state | Bin 0 -> 510 bytes gensim/test/ldamodel_python3_5.tst | Bin 0 -> 4726 bytes gensim/test/ldamodel_python3_5.tst.eta.npy | Bin 0 -> 96 bytes .../test/ldamodel_python3_5.tst.expElogbeta.npy | Bin 0 -> 272 bytes gensim/test/ldamodel_python3_5.tst.json | 1 + gensim/test/ldamodel_python3_5.tst.state | Bin 0 -> 732 bytes 10 files changed, 2 insertions(+) create mode 100644 gensim/test/ldamodel_python2_7.tst create mode 100644 gensim/test/ldamodel_python2_7.tst.eta.npy create mode 100644 gensim/test/ldamodel_python2_7.tst.expElogbeta.npy create mode 100644 gensim/test/ldamodel_python2_7.tst.json create mode 100644 gensim/test/ldamodel_python2_7.tst.state create mode 100644 gensim/test/ldamodel_python3_5.tst create mode 100644 gensim/test/ldamodel_python3_5.tst.eta.npy create mode 100644 gensim/test/ldamodel_python3_5.tst.expElogbeta.npy create mode 100644 gensim/test/ldamodel_python3_5.tst.json create mode 100644 gensim/test/ldamodel_python3_5.tst.state diff --git a/gensim/test/ldamodel_python2_7.tst b/gensim/test/ldamodel_python2_7.tst new file mode 100644 index 0000000000000000000000000000000000000000..36df3078f80407768dc6e93dffffafb2dd4122ec GIT binary patch literal 3298 zcmZ{nS6oxs7RKo%6e*#r6hT0mXru@j2_Pjjfly?acv2vdgyf_kAd1pKq>I3yR1uY? zAbq44>7pA$XXV14j@bRt=sOu^wvbZrt2 zATvn}B9=nKLu_$?-G2`tyZQ}~<0{ChhLS*|X?QG?Mko5?(R9EcPoe-gI>_aPg7`7X zmnk$~JdF-=BiT_9G@9t+$Ew8&^LT;0YA9|x17P4mJ`@B;q*DO~mVl>${CE_E$iUM8 z29e@N2O-E4r~^I#nGB#A1R9=BppbB&fF;}!V>k5i;U0>I3ef3zIw*+5p#)feL^G*4 zR#7@AgyctwlZk#rGV78`qhJ6Gkwjz!g2I;Y@SthSQs@;FHx7>l0>J~8@F}(*3&s1d zVi6=KN`NJcK_gR7=%XyBq{*_#bQu`G{6r>A)|jf5y!F=Py@Ap zUa|~3{fIh$w~59wC^QHNRd)g<)KI*3F1m(>C-p(eUr)X_S^G!30U8YmgnaKR%Rht0 z#4;eD)Xyx4AMR%qlt!{epx7}{PN0k$lHHEV5%DVn#|Wh2A)qXZg9$^jA=z0Oa%wCu z?8f%?_BP*G>&FMl#=yk5H1V^s8PJVeQbe1lC|O;Je=t)?64^5yiLZ}3;!FeSvi!xfWR4`!Oz9m?ZQv{$yq!e#qYf-1=` zdtu{3DHM)c5~8d&BGYWNtuNQpsUbIge5s>q2Bw_YfByWv`RNDITs>?z1AY3}uju`8 zNsYc+K98{e*jy!2ItzWDQm7r7+F0D!$8~mYqcKexXQwCqvJK&QO~Q8Z$?d$C>c!d+ z-KT%{IH5<*X9=>+7#wXj?~;y&-Ry$)T2vhqYa7>USsBLHJIS8GHZib;$UKvchcjCy zaN$lo-v7RR!ddm5RSdMg&L*EDXJRUi(Cax@%e)xe)#u7hau-F zo?At(I*o_EaJ*G31lBJZ$8jpq=Q^W_$8?QHN^qLWslHoYJAZ5zeZFOLU9aw;1?il# zRa>mI#)ae@RoAghqnJBaGeyA}+2_@IszH2m>3FRxrN|58>x^2tv$XjJX&Bb<7+ACN z4#wXU_K&U?i$ z>$0Gsl(l4w)nrjzhq1l>g**CXt>%V?USdM>z>43Vk>)EEQm8e8t-kR1LCNKCVvr=s zG`{v?qO3Ig!o(kp;s>1TB^^;84?m+S0dF0ewp7?3{2roO>}vX8XnOq7n&alTYbTx> z&nkB~)}2{)y*^arWhHaO7EmZJh!$OsDSZ)c+dR;g!~IE7sEGDymh9wEBtQt#-9K)r zl&8f3xgc5bKIDG=#hTMVmzncTna_@=2G=ib#sBS^d(}%dC>x&eJmrPl5>CF{@Y5sp z!&^O=O4?J~l;L+*-}^9a(Mh&T_e=j~ep}!h&L9MA-{zLuGED@`^6-jFl*80F%z zwmZbDghuZ-yGeQ=gtGCi?Id>kW~B8Y3aPHcZZQ}J6^jR=RD1fw_($u;`DZC(F%F6W zyN%rAI=A;%2apc~943=;TT5hudm{X9*vDWVe}Gl5A?$edy8H8r#6L6`hcAELjg1?x zzq8BlIX+*dSvfh!n+(sECan2Q9om7iJ6=|CEQC zT%Bt)LXum^WLdsNcgO18<%yRN$?DBU0HNS|>)b>%tP(o9xziE!jJ%>bfRcCn3rEwF z`+eU$Gq~&6s@On{=F+NSr_xHo`pHti4C_wENg!mIr=<+Ob@xJt@))UNL`q<7cPO00 zVkk4D)M=oDK6RrmbZy4#bB>7fy~LumH6KiO;JI_wmmde@XiW!uR9i&#zZWmy=m12= zpt^2h7t5bS0K;zirf$?;6~$aHyMJa5ryi)wz8<1~q}{>ObEh(`@ep}PWH>;Kf6u!r z9pR(P-P!pqw?Ogo2T_Um)NB{nzI6>CbZ{X0$ZyOr*Rx+!_}yL`LzH}(ZZ)A!?)KT; zP3^ws2HV1r;;o~md|RbwBlO%dVi2;)zllBPBWP4~&!Qi~xHq!WY!sPUGB-ZWi>3UI z-4yuCY;tUq%_r>1qW%b%$?o0Uw5HAr(0#{c^O1$U0wXi&Y;t@8kJ5bN!R54#17Wzd zD^%UH3#Ql6{QgWXU2$ff{@nua1*wF)URFf{rwlUiHg8mL6*|YHj`MSAU(>Xbc#%%j zeiXULX5=_}<;=ddA(*opzn6i&sG4f?@U#8B9rc|8gF~huU~}74qVam4bjGYITB~PGn`jgYu@*X^U@?3 z)3(uW)V^!6=c{xiuF{eL&dilZ9>bA2yJ>S;S-mQIR`DZw$LC|*#VS9FF7nOy zj|6lk>_=%R^}kl!cxqLy>|Km_Zc^D2b~dC1QlQ{CxJv7xZqDDJcxvOfm-85q3Ch$eieFoN@4sFeF4te9!>0B0IWs>zsamsYwLs-t!La6s|UWFLL z`kOtKpqjJvR!mmclj5NO1EsPYzp0^N+h1#6XcF@v6!b4rScoz5$Dp%ps^q&Qqd|gNv zX`-$vo%_As_MZly!(x&}Mk#Ja)3-`YpO8{KqL$z+JLG#t_}C>ZO@eITV>_a z`5lyY`Gspgm*_BzX<$wj<;}yFUu*1KZhsta?sx!U0p+q`Qgqlrz>hB zBF%<%H#Kq|oQkWRjI7kgWr|6K8_){4-BM_>ytTtC5+Zh*IJbRu!?h)wn+K>j2kRU?Cj&_ZKG>Vy8J%pV>m zBPsWKLJ;PAVv5eX)3Zv6Uh-lV6mmQ!7sK{`hRSR68Twb3jXiT5+?XxfW|Wb@JI}b z4~?c{iPS(ksOvA+%^bW$+zA%Z<3LS&CPw=|H+b*o literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python2_7.tst.eta.npy b/gensim/test/ldamodel_python2_7.tst.eta.npy new file mode 100644 index 0000000000000000000000000000000000000000..a5ceb80b2d236cf16d807ef7e63979bed00ebf4b GIT binary patch literal 96 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= dXCxM+0{I$7ItqrGItsN4aKOa?1`q6EGyo3%7kmH! literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python2_7.tst.expElogbeta.npy b/gensim/test/ldamodel_python2_7.tst.expElogbeta.npy new file mode 100644 index 0000000000000000000000000000000000000000..3c5eadd9d063809dd8badc012892e8b896502374 GIT binary patch literal 272 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7ItqqHnmP)#3NXOcb17Fl=X{I(qQ|Z#=KEUgkJyXdxpS_?e${7rU70;C z_NtuEbU-v?oWgF=6`Sn$zPc>_b!Llw{=++3v$nL@KWml<>|M}e?|tDC&x)8+_7>%w zj2{zE*-LMIFbidspYSY^EY+}mg)VK_uu`uFMGP7;PE6%)P zcL>+a5xRBbzx|P#37QF-wf0|QzOl4b7TX{Col(%HP-tJ06v~`hl32)_!I6Oo6Fx|o sFt%mzbcPBrfWZTM7!B0J4h-~^{N&<7es4}t1QsV}f+A2Lwlql(0FPPCG5`Po literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python3_5.tst b/gensim/test/ldamodel_python3_5.tst new file mode 100644 index 0000000000000000000000000000000000000000..397db655bd91140196d64f9510e0c75e8b537488 GIT binary patch literal 4726 zcmZWs*H@G4m!^nF1eGEJ7K$JsMNy;(h%`k)6F3m%aB>2PBoYW;ih`o_(0lK_NiWio zwVwSC%$mPo=6Y^s=3>@d%vv)WIKOiGnRFvpc#eu~dxk9ZA zRLf<>2DM&QpwTKs$#PlB-+u%X-)h11n_%W6u{1F;QOTct)@bE|d8)*U|CShvrFw<7 zSSMT(TT4W|B9*ENOZXPKPO!)mF8fF2`aib8Vp{D@Xj zV9@GRpA=G^?2|&Rk@0D(JVA7BlI;~Kv^wFc*j#e;FPnx^Ip3%gti@L6m#WwanO>zS z(Fr!=vF0ro9{qKdJsZ5ni<(ls zs#x_!A(g31l`_GhNpgi(rBYpisD9TC`)SANlvz6P5^AzWwbb zo%^Haij@Qj4~&9$hTzj6_+|(XeZ;0>vwFeruM6e+@=`wMFEQhDAHswHv5DBUK?uwc z9{DgF&Tfko9;c?JCjBpe&cDPadLbykN$DxERRXTN3TLg?%0f4Mk$zELYHk!y;j7jO>yD>WC3BB?av{AuPH)};k{jaDRt zb4brGh?9~P6dwLGRESWzE6=kNspOEOGK3c#a!M)39IdF$VVR?9b$v#W8cswRb%%C)@as&me7OwKp4@BQ>@^W+Ah7MIojrb z{hsoa5GVGgE;jx9&171}Ci*Z^Upk=m zdsQ*n#M_Ot%JTt+Z7xuFAxVk@X*`p0VF_P*~&IaxC28Z0?EPYI50!#q38oy-P=CG~t7J8Et)xM60=*j+5SiM%BZhNb1L` z+}{(uMYNYk2N8 zfo5})IPpEGDWtt%Z1Gw%ZM#yvEmfstxtZbAITpuMi#@E1F;hV&J~VcZ6FL-4RUF<7 zI$$hhb{WyY<*Q|gpCq`WJ<2r2l{#~&9?R@#TU6O)nq^>ixLwnrjWe+-@y5DuIQ_s8 z)jLYEL(yE2Mx!)II|;8DOiS*leS`Hc)Quk6Kz9PIquW+a#>mvr_ZSG}C~D)w!}IKH zTxGOYLdOyQ)Z?W_&6`*g#^n6tJC3x9I@K3Pbqv~Z@FzN5oJ=|1F^uS$Lks4##gIGm zdh;HBmU_r&B8?;3^D@Q4AC-)W>=-O?o~9U2KWHPJcIou{6KqA&tS1@MarlB(vr)zV zE#@UU-%pmgVFR;hwB>u7n6^!*OJ+Ul;2cd_(2lze7PYiPv&9){SiGwI5=1ADUwdFW zz)bYs5!i8&noiT&p?YK+T7i!=R9f5u4N`pAH6r=e_2earLyf{AEO`7h2&(`{Oj zyJ91pcJRaZ9_>*}K6bR&O6G@swD^sd3Ju9pbS2XY-@Qgd20EpME7XS}Wf?P~&*B#C z#h_CbhiQd7t!VC*(#UUtXoyu&hbD$bzj3y@sWwc*B2uM(K|S;{hI$#0!?dn&@w0KD z?P4_F40Fb4U>qGqeB^xfGqFaQscVeTAr`tx{R?N46l!#1+lNx@FdlnpUWzpbn)1rk zlJO(8Q6qH}N1*EgYF+6#5>p?j9mDTYgQYl~i!&`=dRip)r&Stf5E-dH4eeh1u#rxv zI>+%{tPLgcBiOQUggVT!@L>RdrBs4qcvDw^_{d_RJ*M;oyzmLW`&c1&Z@ zo*~RG*Rgj$`Lbq4I|~p7=+G*c={%2Je-3(_<}A@2jR_|uZIsaDT{>_uP58LL9VF7? zXV&d5kt2t-i&I&wTQ=HWaoM>&>^N{Q+hv(Ie!VKpPZ|JGI7 zphIR*omm2Z4_9I>g9&5QaU-e5A!_*XOdgBsO*ocgr-;*bpLLLUssfVoSkNxm0~H_g1hf1`jgxZ7v!9JMymz3JbXtw4eipuC%tpjHVWOB_@3`#2$n3ns z64_7N*HJ4+|JTx>m$VRrDyh{c!=)g51Exdys@V^;aEldZEH9CD>sc6v6iGDdCCB!o zhqoSET?+i}pLmniZcr77A56|=lZbR0{Q80u6E0wq-DzjjL()IJ(VN7B#LrajN(ZhF zF;s!w>pA|8zSREBq%_Bumgyifi4}Q|*0QWng@IT%9%NX`o=}a(kc$lwclkbv(vK#n zMas>lsnUfyzJ?T42IkYzeY7SsR(bZpJPzVnRCdfi(zt*I3HPp@M?B$dQ+)*0(_sP~ z7kTi0?7Y@Vo(^2>!Z_=Z-Z z^P<%ZzqVk$au!36qTJZ`!(f*uJn4P+X8_OPCuix1f7Mg#OGlm*jP{9iV#AfG$+Y8E zxfx#?at({Q39MU79_~Cn_q#r#aZeibLcJX=V#DTL;w{WLsA$|-E-Ggb1r*Z8Cmcpn zXClkiJ{>F3_1&R>D=|gY-oK-V=X!Yv&kM))JY#k-MF%VsSfGED=`g^!}J**Rh{0zEkqJoC3%EgTC?06c#L-bPJeQPXW{@z_(PKq4Ed~A?fKYmxD+LGqJM{v@) zIsO*Ud>Q8fV}{nA-e5}ap)(Bgv?a4({BAq=q_bA`anoZ1T;sLSM25|j&MM7=8`Cu$ zaG_Tp=_9pFQS53s8B6>Q@eBJYS=te}|`e^OujTC=*4xRAe z$I2Z1L6RAZgKy|H<;2fsp_8X`tf;x^otj&t<$5XOxf6%!xins`#%y5$T0Zbl(O?o& z%)Ppf@mb68VvP#&qQQ(98Jf82TRzUTYi*8R3Ad!CyYKkbtA!nIe9UQFmgS}7W>WQ= zcRc5$HwuS)GB78Fm?lTnnaqb^rkgMK=dnGx$lKpXF5r8kpIq8@@|$j z9i?BFMxu+_JgjND$lN}T7M++EbnvGTfA(6PkRiO%3$OE2{>xNs`a1eQ|Lwn{!Zs@O LLSnulKk)wmC*~G; literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python3_5.tst.eta.npy b/gensim/test/ldamodel_python3_5.tst.eta.npy new file mode 100644 index 0000000000000000000000000000000000000000..a5ceb80b2d236cf16d807ef7e63979bed00ebf4b GIT binary patch literal 96 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= dXCxM+0{I$7ItqrGItsN4aKOa?1`q6EGyo3%7kmH! literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python3_5.tst.expElogbeta.npy b/gensim/test/ldamodel_python3_5.tst.expElogbeta.npy new file mode 100644 index 0000000000000000000000000000000000000000..d0dbf9661e42c55f88229a392182352797d1b113 GIT binary patch literal 272 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7ItqqHnmP)#3NXOce(Szzce6B( z*t2}?kvIZW_quuln9iHO`Lvt!K6|#OUxT}yw%8jrHqE-7TW;?%`Iq|=&2syuTiMpC HlI8XQ&E;>b literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python3_5.tst.json b/gensim/test/ldamodel_python3_5.tst.json new file mode 100644 index 0000000000..e19afe4ba3 --- /dev/null +++ b/gensim/test/ldamodel_python3_5.tst.json @@ -0,0 +1 @@ +{"0": "computer", "1": "interface", "2": "human", "3": "survey", "4": "system", "5": "time", "6": "response", "7": "user", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file diff --git a/gensim/test/ldamodel_python3_5.tst.state b/gensim/test/ldamodel_python3_5.tst.state new file mode 100644 index 0000000000000000000000000000000000000000..a46ade20395bc974526e9ec6bed7737026a2f4fc GIT binary patch literal 732 zcmX|<+fNfw5XQHyQo3AVSAZ9TTCA? zB0{YZjY!Y{S`uH38g3dTXkv2a57X(k@jPsPIs1LzoXpH)zdmDTn3ayEvxaH2sBLH- ziFXWb%GF#`1YvjqeDmOsD54+;s%p(-vW{sm2qfT`5~RCKK5b-m2FK-)QtxS)ZsiIL z>JkuC>ga|s%3@HjlQdU|>J$)7=WW;097ijNs$=Te40D~l?urnqs>F;@{R^TjEJ1^= z>hwmPiDsq(A~Y&KicUh4SHjj@D{~o|mmwU7mLi;pLu*9#$$q(U8QN-*hFi#yvt99% z^T9RfkOkRSgp+aTjK~2mZv?s~CnqQV((yjB;6it@lxkFZsNAjbM|1oDkNA>=zicey zHZQ8YDDl$mzV?Z`6c}u{E4g6y|^nq)Nr4_q5QAm*x2~ZaME+~BitFU@c{1P1}>53 zb1;_1=RGl8$9I&;S8xv(M)~6Kyu_baUHFNY3rxZZs Date: Tue, 4 Oct 2016 18:00:38 +0530 Subject: [PATCH 03/15] Added test for LDA Model compatibility between Python versions --- gensim/models/ldamodel.py | 3 +-- gensim/test/test_ldamodel.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index adafeaaf49..b0d5e5e701 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -1050,7 +1050,7 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately = None, *args, # Save the dictionary separately in json. id2word_fname = utils.smart_extension(fname, '.json') try: - with utils.smart_open(id2word_fname, 'wb') as fout: + with utils.smart_open(id2word_fname, 'w', encoding='utf-8') as fout: json.dump(id2word, fout) except Exception as e: logging.warning("failed to save id2words dictionary in %s: %s", id2word_fname, e) @@ -1072,7 +1072,6 @@ def load(cls, fname, *args, **kwargs): try: with utils.smart_open(id2word_fname, 'r') as fin: id2word = json.load(fin) - # id2word = {int(k):v for k, v in id2word.items()} if id2word is not None: result.id2word = utils.FakeDict(id2word) else: diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index a96d96ae6f..f8518c1b8d 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -366,6 +366,23 @@ def testPersistence(self): tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + # #Method used to save LDA models in Python 2.7 and 3.5 environments. + # def testrandom(self): + # fname = os.path.join(os.path.dirname(__file__), 'ldamodel_python3_5.tst') + # model = self.model + # model.save(fname) + # logging.warning("LDA Model saved") + + def testModelCompatibilityWithPythonVersions(self): + fname_model_2_7 = os.path.join(os.path.dirname(__file__), 'ldamodel_python2_7.tst') + model_2_7 = self.class_.load(fname_model_2_7) + fname_model_3_5 = os.path.join(os.path.dirname(__file__), 'ldamodel_python3_5.tst') + model_3_5 = self.class_.load(fname_model_3_5) + self.assertEqual(model_2_7.num_topics, model_3_5.num_topics) + self.assertTrue(numpy.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta)) + tstvec = [] + self.assertTrue(numpy.allclose(model_2_7[tstvec], model_3_5[tstvec])) # try projecting an empty vector + def testPersistenceIgnore(self): fname = testfile() model = ldamodel.LdaModel(self.corpus, num_topics=2) From 8b2cc4228d64e2a60910b1db5747c11919076b6a Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Tue, 4 Oct 2016 23:20:57 +0530 Subject: [PATCH 04/15] Modified unpickle method to allow unpickling python 2 objects in python 3 --- gensim/test/ldamodel_python2_7.tst | Bin 3298 -> 3267 bytes .../ldamodel_python2_7.tst.expElogbeta.npy | Bin 272 -> 272 bytes gensim/test/ldamodel_python2_7.tst.state | Bin 510 -> 510 bytes gensim/test/ldamodel_python3_5.tst | Bin 4726 -> 4736 bytes .../ldamodel_python3_5.tst.expElogbeta.npy | Bin 272 -> 272 bytes gensim/test/ldamodel_python3_5.tst.json | 2 +- gensim/test/ldamodel_python3_5.tst.state | Bin 732 -> 731 bytes gensim/test/test_ldamodel.py | 4 ++-- gensim/utils.py | 8 +++++--- 9 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gensim/test/ldamodel_python2_7.tst b/gensim/test/ldamodel_python2_7.tst index 36df3078f80407768dc6e93dffffafb2dd4122ec..e7b3af4d6487f0e6a0ebc84bbfcbbb85f3c1659f 100644 GIT binary patch delta 3081 zcmYM$c{tSD8vt-)8_AX-l&z6{yF<3Dmq8hW%9O-RvoRyvceade$P`jZWXWE#BvP)W zNXbr?gt3KWcZsPel%MXs&+|L~ob#UNea`#7&vX9xOv^RN-If*&A`tzBoe5av??aN7 z+IhhsA0H}_1_(G>ukrk|P{L#yk%Fa>g9535Ai@A8?u!iwz+z}53Xw_*@+SabM{746 zvVXl~2PGJcrBaDhKnOuV?GK~}VCcaFEN$;%Us9i}tFWUrJM^Ptxl|ZRfI!4!nZSNW z>#6_B>?e?^Gzu9Q#(kNc~F$50+w3l%(c@(6;nz!ViiuO}kGiDgayv-OH&aTSuvTZwK zqDT^2$vf!jP*01oZ*64iY`eO9re5g&p{mY*?s@c(OcOon>-CG{PBB(S+9gTtZ>O-?FW=j3#rMC9SUD$$y>{%2nW@BOf-TbiAM~20>=fNs&LVEH>cQm}VLtFZ7!kBTDYVg`Rp$3*=B- zydM3D{-vmBPEf*Q%a8Q>X9?Q`=7aR~CcYgO`dFZEgk){&2MMk23X0R&# z@rrI+uA49v!j>SSWt1>A0uFq)pEes;{k4PM!!V)HRM|Ew(NqTGyudqryP-9$BcMH^~P%YtfnR+?^TX>>`C%9w*~#Es^4H($w3o^f1M zKw{9zi6=n)d3M|n{j4K*rf(`ZX|;S$ zmG32`7k||*8=1Vd-RAN!!@X#X0u0t4m}cJ#@kIMw+x&XD87^)rH<;*5X0m>8Q;@io zYsVuqE1d=hZD#O-+DCTR`)BoiOCko^!8XlRrb!_6jKu?%2u0Rec-Lg^t@;uilXJ7b zu16o0*J!jHb>AY%V!3Fs-~I}^a1rt#=h$R;)zuW+fQCAWCKpD$EKll#YUuTE7b2R? zt1V6R`L>LFJ3Q7ZzO>cAwb(6(-}rc+X^&z9KY9^0F`_9JebqM$61%hVayqL{ z20QtSDtxFdW_Yv6U9DpwT$w4cb7qegKKD%A@bHCR@L3NB@zXm_JwIeowZlDeD95Wd zX7tU>bT>Yw<3>`4a(YqHA1%hiCWOAN*x8D;rhQC9k3LBq$>F~gS4%T@PcFP9%eJpR zoav`8iz-p}V*tE1l_quII>HTD{2Ql&a_ z1!6=Tu@g%P|0r?jwEjcc5(bBvl{|p$-4v|eESn3sCkT0M8n#7ir}3JV8TdZ<*Hgghn3ssev{2Y_6UlZNHDrXvh6`<1ozbWbGT? zMNx?7vDKl~q15eEpH~3CE$e*?TFdfIv}k`5ly(1U%})1;>BkhFEF8(p`@pmuosA@kSH((0S>Vk1hdM20Q ztc}%CGh$(!xoy6d%s|b|NeChkPteJvnpQ#1Kga$(+2iuqC()~twrbcDzI_oU;@76` zgq+e4f%VQRa#K5oi4L+lpq~vJXIrjd{U;^^Fv5@n^J5DnowGj_@DO7MPp$~OJB6M5 zxuUH_!{=(I_PUY;nggX?*c6!!v&MJ(=5!RkyE>VK`5%qj!A$3p`No+GHOZgY z`<~tJ4|i#9+$xyjf}7_b-enb()~~IVbt{wqdX;3Vtz^bW)w?pFL62S7hnl$4#p!8o zJacfxr=xh~D8Ku=N5-tI*!&iIRZ*ms-9US!o#vo{@UF?|i5(3P`K3jT1D`$5qbD>R z63vSMIlh|bM)qo7cA()0|mIDixBAMa)@> z@hBk-hKeT#GpT@*_wV2zs8RuCZ{UaqYCka~*xo> P1cSX12!NIg9jEs{RA;zH delta 3116 zcmYM$c{J4f{|9jUGL|f3&rZmenk=EoAj=qpF*Lc;FvCn{%zS1kq^9g7+eo;|npCzT z`^~;&PjOu$vQ^e7Thz~8zUTM;=Y3x1JkR_6eEs)mllv%_EG5MxF^GN?8Xn|?!y^7S zq^KQZfr6OiYZO`_o<;|F;habi215)8I!xl|0G|)QuZ`rTGq4OiAb49<-d zI_$up5q+5qJPr_#LyA(U3?i8ripO9{R00-|h(ij~ut7Kq8S`&#Nwxr|nmAa;4Up1C z@*_Nq%*;-i0Mcx8PR+A2euq`4A)p`}mPW&dfB;!IM--CNH^vQ+(}r^*s9aJ1C2))o zDjo#LBe|FmI0u~bFrmPH$f?LOrCYSJ#apH++1<&1q|SvoeKqomT^oA-(Kh%z1V!}r zF|IoWR`h)ATL8ys8%2BkI5Qs@_R}ds4YuHN-Ez*GneKC+@~jKvlcT$M&H1#jX6lbI)ZO-lRxX%%KzNXawn(L34Q3<+F@R^xu0Ek`u&8$o{9^{{uv?f_MuYy z+G$d7@N(D8nLC7$_g#@kETROu zKtXR8dm1(={^0tcU4-4ZVv)eDLZ9!7B_21j7%j!=X`}^i`|SR{Rs8*~<1OR*C$^;X z?)L5Rvbq;jbG1Cjv&`e}-^daNX60Yi7;A+JC}iOEub06uPHZsh6wc8W8f77VX2*fr z)lU#+@4EQ2*XPBH_*mTuv#xB$N{CpDruny+B4X0Yxg^H-(61+ZCu$-{L3@%hL2Wg4m9Z%`?bO{PPxMD z+fyB-?Oxw1+6yQ4$fp~h1DH;j6sP4!Wq&b$Eeece5`uT`v3O;+t&*|U`FPc3iV}4K zX%GV=4pOY%-t_XOO94}=mmu+DE!~Dwd=)>&0W!;$z z(~Prn9Efg}UwfFU=ClP&8F)q$P3yeU-#th+PdOH$yl;``Kah1X{E0PPUhWgmsX&b0 z%G6wRhfAU+D?Ij~1tskb6Uo81b&xpeThR`R6jFUBVkry)mPo|LX!Z6>2#(cH2+mQ) z<6KmO_nLSo4DTJR4Z@!UyG*6zwUx?+^+pBVc8>FX_61V24ny!8_YCA0OMYpzh+O%; z7oRZEaDPt_J+V-&S2Z=yp9;;9C9DTcEAE0hU9Yh;Jq3Htg!Ue7q-E*f?Kz1m=|`G^ zV5YUta`kVwFg3_dHuxC5)2)-36Y%l!$cZ-lU&`Dfd$7h~=e}}os~x$$>oy)O$Misd z$Os!tMe6X3MM?9DSgt6y8tvMDx;FU+Bwe%BgoUYi-aS7V3#kH+ZS8i3z9g^e4I-6M zf8tm)V};)iY_h_-kFQA#*6J;X5o%HG3_2KKYKHqc2+#eK zLat7%_&C@I6>+)Zc@%a8RbYjp_Gzl-`5;&yXO+^9)aTq{nW%TTpwYWk=}n5{VX={5 z3Bi5;>I_(b5pP%5ue?IlYhT2r64P=#AO{Y$SdpWHvFewY5uWFMvISA^EI?|3OjK>S zo0oG=Pjg40jp>dksAT(?mB4n{xhP{)W*kgD^^(LZ0fKI2&m86ngm*JL-BFd9Eyue3 zZ9#(lo8OkupVm|3TO0uq&zDR_c`WxH&W zZyt%jrC+BSom;fJg%P~U7Z|yYz{0?2a97enjIP?iJJrn>_7xibC9qePRjm={SgoK!71yCPS}%2L;Wowl;B>_e za#%)Cz!}D-$ky%yE&HA3QAd=}4`Z)cNKXd1Ez_O#-lNOCJlNhc`>E=*)q=OE0@_xj z?=OCpIJ1U3y;Xp=tQ01gfevAD%SXR(OLvJ&$x15jJ;68L8?;t1Jp0%y6$@!Gt|xrY zb($=`R*Ow9XvQzs+1`2;uU{I%7cl0q4E2GMrurepbD4&gD$RQk`IBK!Z|YUfzwBzL z?C3iuT@Wp4e<0G|>Ml|E+=gl_T8y@B{H&%Lm9A_!r7@-%CGn|3#M_=FQvfDT$>BgD znHc?m^{e$3L-vz9`-GI6V84UB7OhL{x>Gpg^+w4LDQWllZNEGXrr&>g&&G5x)onoM zeSM0ku2yKuGm}IG!eDEY=-aV!bKRfyi=&0{7i>l1!H#EgT=Wg$&~OhPzD0qf8r8UT zlb0cuKNmB^nyKq*=YMNpIelylP?C@?HfN*EXYQ6-JtwifV^$J^_IGfW=mPrBMVGp? zc6H&$MM{cv6atqulhJqOCgC>!{7v(zfh?rW(p1ar>M60rnqc=CRp2beWZ7P zXSU45K0A*t=%ThKC|dWu)R19819Gb=@1MN+S&Q(v_jRI)UkAflmvz$|+*d6kdU}SJ z2twW8%v9DzMO%*;ZRzGdKAliE6DKUf|4t1bow*DOA z@8u4wyOGA(TIi8H&r?Er*2FD*=Lq%AP`xX9Duh%R;CIJAvx~!uyw;u`Q8`&?q&enh zg^m00LR)T2n46X6wftJqySjHK_C&@K)(3%hZ*lP6iH|>Z6>b!+0JCl?EwFqb_K0FX z&(d1O>qE~kt7{g@9XNt)HR}=mi$SrC8Nwzd(_cFs5vBe4Jw1aLe z?O2*tgfV=PApBJ<34_0ir-cAV;UJ`-AAuPZNGFE6;{ip)5I<7zZ_^%fm`n$h;KE2g zioZV{&j6I|pzHrBmiJ$ust{xd|F2BdLuJCbq6mks^f&#_m1>8slw}_gRtMDMkOCMC z-H%8Op#vH|e-Hn7l@4h70LQeELik{+Er}A~i)UZ~EhP8fm-eB{|LH=Z68-3a4x9_g j>-)Faf#ZiV{#V`qEuwpia#UKQn1JZd_PP-d?hoSw?C9MlGrnx zw?9<9Di+wJw?Fzi^ftgvfq}xIdu~t@waLyFV-n^|iq=g+V{7r)lZDGKD`MD7%%4 z_k=&}Pu3~*ugX7PhRX9KjI%$s!yq`YM7uw+t2>-sPJllx^WV9E%YQ#prjZP#mVZAG Cm}6!D delta 200 zcmV;(05|`T0+0fbP=AclZ7Xcggg>I=MKm|Qgg?YT7TnyhDA1xfbgg+$-<}4h% zgg*gZAiEW)vOm4*(--xbggwlnWfkW%ict0}+%)y9>#y@!E-wsWlc0V@k$Jleo{y)zVH^+3y z{y%^1i5vFQ{y(F8Q?I7|us=>I+|38?1yc93z|35l^#^=s1{XZNa@v>p*|39TXfpfxmvp_!-i<9Hf`zSA{Ea^XW-#wF1%p2!hJ5y!Jb;s` F0aSdKXfFT& delta 203 zcmV;+05t#p1O5Y$>wn)xjU$p@hd=hJfeO(2g+I3T(>nN5hd&5I&IBUQg+Bnh8~`HE zg+Kj5%wX1V@IM^wizev_gFlT840KmBg+I4b>&f<#gFnGH>rZR(2SCz<{X9eV2S5au zk9ptj|37x+xZ6O{|39ky>R$@f|34B~iVs-P|37(S_Pb%!|3E*A#{auv)c-#RdS^aU z9`8RCblDd6><>Wl^NXm=>IXnqDQptj*#AGoe2^(%DSbcnT=oQpd2>I)`)F{6AaRqb F0aOFlXAl4Y diff --git a/gensim/test/ldamodel_python3_5.tst b/gensim/test/ldamodel_python3_5.tst index 397db655bd91140196d64f9510e0c75e8b537488..0875e545cc0891d6c6b6a0f684479ebf82cdf919 100644 GIT binary patch literal 4736 zcmZWtSyPj1mt{~vL_`n)1yOKd5)PmO3WAJEm_*bZ6k-wr2@>GVfCfPs!X!eNXBlJ+ z2r|eZW7XO};OpvN(0%pYe%;qy)x8r>otsV-@aD;WhQ0P$Yd@skPF||0(5lp7>dIn8 zxi+l4SZaN8$}E;s$yPJqNrAA41$x$R_wup5{X73*K4$@YK25AtyYv*O8FRP3I%8D zDWv69N~v&G&Kp!2!sL9&Ftxs1r;=(kQiGFaZe@j5r_sxGPJ+wtD5r|z-=7NSL^jQW zt6U=Iq~%&CMTOe)+i`qJjBrV0BeHE3F6Rk8fg*eBYH@;Zc6N5g|MJWFOJt)HuE^Sz=f!S3 z=l6@I68?S={FJ=o)jZ*vPVkpmLAGL8S*4O|g#eMA*pXu;5^LUC;d-HPBT#J5LkgW# zxG6qkd3s-!T3%Tyvy9wQ{1va2tEvoIA+S&gvSMCYrBkW73KC0HaJzM)WuxD<&CzFi`_X;t*TI|d4VcTrA#VQm8*0HAuK(?So5Fh zs~0|45v`JHwF<2eE-JR>rKpyc|5fxqL{8R-%90YTLMKF|C;W?-_#FQ&m8#WJiB73eXqA=a#loHRgj`wHzyFu{f2=8Mq!q=L zYRPZ@-u;U?zXzQpl056D&n)DY$aR$(C*fXDo)8^qWp!45L~LwSv=F0oQCi6pt7Jyp z&l4UnBeJTP5^>7kEYRN(;(xQ?VV;n{EKvUc`X^fJzr*m-Li`DuLPJ>qt=+4>hbH=h z5gMUke8m=~sUwM6pG99$I?@WJE*7NWK!>JOYM};0DDCIu7hv!eb<(dJw@$poFEr3z znF=jrq(QGt>PQPh!>f2-cYMLjNvssnDyH&I__@+J4$Y;R_$IK?C#ytEl&(1~O575vJ3zM=f{twBz~``!L$jK6SswG-qwfOT=J}Cz{Y^ zON+sYXvQSI(IHJ{r$)%BgNramqf}4R1?b2rqQ(l$VGyg%JUm20&);J14kkihR;g>z z_9q>`!DPRZ-;WC&A9iG<$Soos9Re ziJxT7lVU+l?J3ksjo5>c`eWj8oL!_xJJ!f_lk*zmq*gE+iHTG#HvKBlht?xM5mLii^8+V}XNNSJ4 zm$&aYtEQW@i#3LGDvq}C*`J|YzEaIH>+t8w)>CUT#vQNHFQ)FkYYuwSJZK}6mayPV zGd_&V?Bn=aY`nn=_PnsbI@oygkoJ?Qk0xPs$Y&LO#kM9CyWXXD(EG^?gU_*^b;FZM z*P!QZzSBg03eD3Tb;V#Z@P88(>L@g$$nHF!e5vFtWnWuETrY`+?bev0JZ4dY{qDy7fsb#4!dSL0} zA2fi)EE>tN5V#$Co(YWOz+D?mzr<#p18cvz2!}F%+M$h#Li+ZAi)$_l#PIVwDQFY( zveb|99A?BMMqZb^a;Jq{EEnSIdzw;JFl~D=pF!&pQku}wtOKijhc+Y1s5KIciK#Yq zS1?;clO=H&q)u#n!Vb+pWEi_JgRjh@PTMNl3ROkH>`J3=s5{Fi6JISl8N*maO>LHI zUYP-{eysT&eQt=$$wF4u;71(MH#8_|Fqifox?L=LX2&CIv||6qbqov7O0b*3E_8%h zceWqu2rYHseJ38!BrS*6unR7qK?kj4mv>oZKO5#GCiz{f>KgUXHqE88{kDi`=;meG z*P<1T7qA_tOgdAbrj<%Y*~~$v1cxHe%jgtvfMsU;$}1N07za;M+lSkHzRn9U`oPEp z_!W-vYv{F%`DO%j&37^ALnbALu}D8du)?TKGROC+HJhd}l8*%%(+FI-gY!Jeh|%{2 zMP=LqcIf9jGNzo1@#fD=nJU94`b8V)&yulswA}y5`)_8jQ4S>1to&5SH9DqsKJHw2 zDa>`$Pi+o~WW-hsdTeiTOERC27hQRgLLIz2@6U;tWd<5zXXs^_cZEO46gAvsLe=A2 z9D3sTX@PU^O+{l7tKRI+MVz=pleOH|n$e%6X2|zlsVTb}?Orxa^%-pY<1=l&sw3kA zY{y53+tYp~{q&)FOeSAuw{1Dc4BX@DZN#zmS5uhw)A(jceGa3*xtT=RLDO0E$5A(X zT_F#2&x=_WLw9wdl+VG?)mzS|vVypQsata1G|BJ0d_;fapW1qupFSC z@ve`E#?}oQL8JR!1&t)Tpud);T`%Q@W6ld^UdzDf_EEo-`_axRY|{?Yb6)*6!!5!) zAP%b`WtegZqa__RpAHIQW2}GW>+RxF1XC~_>*+i9s-Zd>N^rvl?bYgCs0Y(_S!`Bm zk!bSgq|JQlUTS)3!&TXN9TCaLYJu@#66>Zb+aOgW&^WcYYXodCo3_cUQ*n2kIu&Ig z6IOz`J`;gV%MGkYCP-gkGVGM@Oi3aaubtaK4~<7inDiDgH`WdkDj}!-VL3%!>Q2vB9`Z`MQv=ESona!MCEBc1D&# zy#rRzOxsZ`-1R5aasT=A+NT)7L4a><5Y4Kw@Ya*o&N4OUbKcV|`W!CAo#faye7%W5 zH!YuY2wNxUi#`2NM-*Yn(y@lvfEKV^gnm2Pcu@D@9_xHj%COII^Xa%Fzs_uT&mc=0i6Kfy8@Vao4gKleo8GbYY3 z27}Rqwp&-;qW%=?>nJ^h<^?R`^Zf!l^goMv!_mtbANejj1M{W360Y8HuE5eZQ)_-Q zeS4qE^l8AFD6Wjnz1u+EQ3CYpUj>v5002m%$I3tHb#b_>RR6@ zY?iZrn@-!&ZY~{>>15zj+VF{lF{#|YnpRa@+Ex1IMh6NG>*TU_u$Oh~(M_G)(3-i* zQ+!KUz;wU|j6J=GF6w{5sMNFbZWi%1f09;CQqNN{_H#Xa|kFonFKjJ$T`^h#ML5);*w6()1WDEe{@G+2RYNA?7^!6h2G%!ON`gDUQHv$UFV4O_LYG*?51 zAK6XKPs%SaCwmf}Wdw7Bp5Xizy!k#e=!aQu92+$Lm~E#I-GzEO^0-sVvUH>-pT`!> zGadRaQ%4YE-o2PBoYW;ih`o_(0lK_NiWio zwVwSC%$mPo=6Y^s=3>@d%vv)WIKOiGnRFvpc#eu~dxk9ZA zRLf<>2DM&QpwTKs$#PlB-+u%X-)h11n_%W6u{1F;QOTct)@bE|d8)*U|CShvrFw<7 zSSMT(TT4W|B9*ENOZXPKPO!)mF8fF2`aib8Vp{D@Xj zV9@GRpA=G^?2|&Rk@0D(JVA7BlI;~Kv^wFc*j#e;FPnx^Ip3%gti@L6m#WwanO>zS z(Fr!=vF0ro9{qKdJsZ5ni<(ls zs#x_!A(g31l`_GhNpgi(rBYpisD9TC`)SANlvz6P5^AzWwbb zo%^Haij@Qj4~&9$hTzj6_+|(XeZ;0>vwFeruM6e+@=`wMFEQhDAHswHv5DBUK?uwc z9{DgF&Tfko9;c?JCjBpe&cDPadLbykN$DxERRXTN3TLg?%0f4Mk$zELYHk!y;j7jO>yD>WC3BB?av{AuPH)};k{jaDRt zb4brGh?9~P6dwLGRESWzE6=kNspOEOGK3c#a!M)39IdF$VVR?9b$v#W8cswRb%%C)@as&me7OwKp4@BQ>@^W+Ah7MIojrb z{hsoa5GVGgE;jx9&171}Ci*Z^Upk=m zdsQ*n#M_Ot%JTt+Z7xuFAxVk@X*`p0VF_P*~&IaxC28Z0?EPYI50!#q38oy-P=CG~t7J8Et)xM60=*j+5SiM%BZhNb1L` z+}{(uMYNYk2N8 zfo5})IPpEGDWtt%Z1Gw%ZM#yvEmfstxtZbAITpuMi#@E1F;hV&J~VcZ6FL-4RUF<7 zI$$hhb{WyY<*Q|gpCq`WJ<2r2l{#~&9?R@#TU6O)nq^>ixLwnrjWe+-@y5DuIQ_s8 z)jLYEL(yE2Mx!)II|;8DOiS*leS`Hc)Quk6Kz9PIquW+a#>mvr_ZSG}C~D)w!}IKH zTxGOYLdOyQ)Z?W_&6`*g#^n6tJC3x9I@K3Pbqv~Z@FzN5oJ=|1F^uS$Lks4##gIGm zdh;HBmU_r&B8?;3^D@Q4AC-)W>=-O?o~9U2KWHPJcIou{6KqA&tS1@MarlB(vr)zV zE#@UU-%pmgVFR;hwB>u7n6^!*OJ+Ul;2cd_(2lze7PYiPv&9){SiGwI5=1ADUwdFW zz)bYs5!i8&noiT&p?YK+T7i!=R9f5u4N`pAH6r=e_2earLyf{AEO`7h2&(`{Oj zyJ91pcJRaZ9_>*}K6bR&O6G@swD^sd3Ju9pbS2XY-@Qgd20EpME7XS}Wf?P~&*B#C z#h_CbhiQd7t!VC*(#UUtXoyu&hbD$bzj3y@sWwc*B2uM(K|S;{hI$#0!?dn&@w0KD z?P4_F40Fb4U>qGqeB^xfGqFaQscVeTAr`tx{R?N46l!#1+lNx@FdlnpUWzpbn)1rk zlJO(8Q6qH}N1*EgYF+6#5>p?j9mDTYgQYl~i!&`=dRip)r&Stf5E-dH4eeh1u#rxv zI>+%{tPLgcBiOQUggVT!@L>RdrBs4qcvDw^_{d_RJ*M;oyzmLW`&c1&Z@ zo*~RG*Rgj$`Lbq4I|~p7=+G*c={%2Je-3(_<}A@2jR_|uZIsaDT{>_uP58LL9VF7? zXV&d5kt2t-i&I&wTQ=HWaoM>&>^N{Q+hv(Ie!VKpPZ|JGI7 zphIR*omm2Z4_9I>g9&5QaU-e5A!_*XOdgBsO*ocgr-;*bpLLLUssfVoSkNxm0~H_g1hf1`jgxZ7v!9JMymz3JbXtw4eipuC%tpjHVWOB_@3`#2$n3ns z64_7N*HJ4+|JTx>m$VRrDyh{c!=)g51Exdys@V^;aEldZEH9CD>sc6v6iGDdCCB!o zhqoSET?+i}pLmniZcr77A56|=lZbR0{Q80u6E0wq-DzjjL()IJ(VN7B#LrajN(ZhF zF;s!w>pA|8zSREBq%_Bumgyifi4}Q|*0QWng@IT%9%NX`o=}a(kc$lwclkbv(vK#n zMas>lsnUfyzJ?T42IkYzeY7SsR(bZpJPzVnRCdfi(zt*I3HPp@M?B$dQ+)*0(_sP~ z7kTi0?7Y@Vo(^2>!Z_=Z-Z z^P<%ZzqVk$au!36qTJZ`!(f*uJn4P+X8_OPCuix1f7Mg#OGlm*jP{9iV#AfG$+Y8E zxfx#?at({Q39MU79_~Cn_q#r#aZeibLcJX=V#DTL;w{WLsA$|-E-Ggb1r*Z8Cmcpn zXClkiJ{>F3_1&R>D=|gY-oK-V=X!Yv&kM))JY#k-MF%VsSfGED=`g^!}J**Rh{0zEkqJoC3%EgTC?06c#L-bPJeQPXW{@z_(PKq4Ed~A?fKYmxD+LGqJM{v@) zIsO*Ud>Q8fV}{nA-e5}ap)(Bgv?a4({BAq=q_bA`anoZ1T;sLSM25|j&MM7=8`Cu$ zaG_Tp=_9pFQS53s8B6>Q@eBJYS=te}|`e^OujTC=*4xRAe z$I2Z1L6RAZgKy|H<;2fsp_8X`tf;x^otj&t<$5XOxf6%!xins`#%y5$T0Zbl(O?o& z%)Ppf@mb68VvP#&qQQ(98Jf82TRzUTYi*8R3Ad!CyYKkbtA!nIe9UQFmgS}7W>WQ= zcRc5$HwuS)GB78Fm?lTnnaqb^rkgMK=dnGx$lKpXF5r8kpIq8@@|$j z9i?BFMxu+_JgjND$lN}T7M++EbnvGTfA(6PkRiO%3$OE2{>xNs`a1eQ|Lwn{!Zs@O LLSnulKk)wmC*~G; diff --git a/gensim/test/ldamodel_python3_5.tst.expElogbeta.npy b/gensim/test/ldamodel_python3_5.tst.expElogbeta.npy index d0dbf9661e42c55f88229a392182352797d1b113..2cc222219aa005ea32e55f634f91db4430444869 100644 GIT binary patch delta 200 zcmV;(05|`T0+0fbP=8W~c^p{&dq1aoPi+?wd_SvalzYY}i$7^5N5dLcfyFUllH-R!nyFYP|vUGy%xIaw;YC~MJ%0WNL`Mvn>CWSwbliY#a zfU`eDs+_{n?1Vo(M1lJ6CWSxv4LILANxMIwK!|<2QGh?7hh?Xa(SJXu62$}Rv=vs|*E#lHXII<4g~r|c754k&*}JY^qLn98iM z_MrWO?RMS0_FL@3&3D9Ku-#%GaZ~sCZ6N(nc2T-=>=H-&t^20k&C)nx&+@fL;s{XT z>*@($I&c2w({9fD?Ae}v4eoZ@VsF&gH0yS5xxLHeU+zmZ%k7(PWm~ICmfHgWUItwu diff --git a/gensim/test/ldamodel_python3_5.tst.json b/gensim/test/ldamodel_python3_5.tst.json index e19afe4ba3..e690d19cad 100644 --- a/gensim/test/ldamodel_python3_5.tst.json +++ b/gensim/test/ldamodel_python3_5.tst.json @@ -1 +1 @@ -{"0": "computer", "1": "interface", "2": "human", "3": "survey", "4": "system", "5": "time", "6": "response", "7": "user", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file +{"0": "human", "1": "interface", "2": "computer", "3": "system", "4": "response", "5": "user", "6": "survey", "7": "time", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file diff --git a/gensim/test/ldamodel_python3_5.tst.state b/gensim/test/ldamodel_python3_5.tst.state index a46ade20395bc974526e9ec6bed7737026a2f4fc..17ecb24a2780bdc0ba2716b8cc64315ae6ac367a 100644 GIT binary patch literal 731 zcmX|7|4UO*6n~rL^rg-$O*3ni)+|~?L5rd*L_|Fsu@O9kHQ(NQy2#`i zbA2HrqOgS!hJ>>+{6T|r{;;}xlkSCkKb+6`e9o6?uR5+9RyrHX<}^KHg)*ArB5_z# zVzy%IA_#*s;CTYxpxh(~LLy-}*~x+h4RP4B8?n@MEwVQbjk1sD@C+@dTHuqKWzp57 z$Bmq+YZf%cp;_*9W2UY;rj^d?2}{ZAnViCDF%J9W22Sg?0xharDx`90DC=bGv|^e{ zK}>LE&aiCLQEd@g>sg|q)mdnhggNl536)QzTB2^$fC%lfhx;aBzud_GjAEya3(!%7 z15r3wg3c&(1tpKu^g-NdLmc3jt7KR>4kUS+g9EIMX)ZqGEgCmhhg z%+dnwFnmTOEYlZS)oF{-HqM`oaldypsyzH)H;dkL-4$9r=defTxp|e}if;&KZRsMd z(;A26DSYX7h?iy|nu!G_Xq7{%;^L@qiN10?MjQ8Og(}^&%>og6Lp$^_?;oNi{EZcS z8oZ4^SRo93UHE}E*~sWETAaYQxWNLKd-1tT-=%3Pc5&%9?&rfIy%O5mu*~eoxPU+D fg&$`be#fmF;i;QA&ERxxvB4+|+HfZ6Bt!oIi0~e! literal 732 zcmX|<+fNfw5XQHyQo3AVSAZ9TTCA? zB0{YZjY!Y{S`uH38g3dTXkv2a57X(k@jPsPIs1LzoXpH)zdmDTn3ayEvxaH2sBLH- ziFXWb%GF#`1YvjqeDmOsD54+;s%p(-vW{sm2qfT`5~RCKK5b-m2FK-)QtxS)ZsiIL z>JkuC>ga|s%3@HjlQdU|>J$)7=WW;097ijNs$=Te40D~l?urnqs>F;@{R^TjEJ1^= z>hwmPiDsq(A~Y&KicUh4SHjj@D{~o|mmwU7mLi;pLu*9#$$q(U8QN-*hFi#yvt99% z^T9RfkOkRSgp+aTjK~2mZv?s~CnqQV((yjB;6it@lxkFZsNAjbM|1oDkNA>=zicey zHZQ8YDDl$mzV?Z`6c}u{E4g6y|^nq)Nr4_q5QAm*x2~ZaME+~BitFU@c{1P1}>53 zb1;_1=RGl8$9I&;S8xv(M)~6Kyu_baUHFNY3rxZZs (3,0): + return _pickle.load(f, encoding='latin1') + else: + return _pickle.loads(f.read()) def revdict(d): """ From c4c1289bd0c24f9d5becb411ad74fa46c601a14e Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Wed, 5 Oct 2016 20:59:53 +0530 Subject: [PATCH 05/15] Created and saved LDAModels with same random_seed in both Python 2.7 and 3.5 --- gensim/models/ldamodel.py | 2 +- gensim/test/ldamodel_python2_7.tst | Bin 3267 -> 0 bytes .../test/ldamodel_python2_7.tst.expElogbeta.npy | Bin 272 -> 0 bytes gensim/test/ldamodel_python2_7.tst.state | Bin 510 -> 0 bytes gensim/test/ldamodel_python3_5.tst | Bin 4736 -> 0 bytes .../test/ldamodel_python3_5.tst.expElogbeta.npy | Bin 272 -> 0 bytes gensim/test/ldamodel_python3_5.tst.json | 1 - gensim/test/ldamodel_python3_5.tst.state | Bin 731 -> 0 bytes gensim/test/ldamodel_python_2_7 | Bin 0 -> 3267 bytes ...7.tst.eta.npy => ldamodel_python_2_7.eta.npy} | Bin gensim/test/ldamodel_python_2_7.expElogbeta.npy | Bin 0 -> 272 bytes ...thon2_7.tst.json => ldamodel_python_2_7.json} | 0 gensim/test/ldamodel_python_2_7.state | Bin 0 -> 510 bytes gensim/test/ldamodel_python_3_5 | Bin 0 -> 4700 bytes ...5.tst.eta.npy => ldamodel_python_3_5.eta.npy} | Bin gensim/test/ldamodel_python_3_5.expElogbeta.npy | Bin 0 -> 272 bytes gensim/test/ldamodel_python_3_5.json | 1 + gensim/test/ldamodel_python_3_5.state | Bin 0 -> 737 bytes gensim/test/test_ldamodel.py | 12 +++++++----- 19 files changed, 9 insertions(+), 7 deletions(-) delete mode 100644 gensim/test/ldamodel_python2_7.tst delete mode 100644 gensim/test/ldamodel_python2_7.tst.expElogbeta.npy delete mode 100644 gensim/test/ldamodel_python2_7.tst.state delete mode 100644 gensim/test/ldamodel_python3_5.tst delete mode 100644 gensim/test/ldamodel_python3_5.tst.expElogbeta.npy delete mode 100644 gensim/test/ldamodel_python3_5.tst.json delete mode 100644 gensim/test/ldamodel_python3_5.tst.state create mode 100644 gensim/test/ldamodel_python_2_7 rename gensim/test/{ldamodel_python2_7.tst.eta.npy => ldamodel_python_2_7.eta.npy} (100%) create mode 100644 gensim/test/ldamodel_python_2_7.expElogbeta.npy rename gensim/test/{ldamodel_python2_7.tst.json => ldamodel_python_2_7.json} (100%) create mode 100644 gensim/test/ldamodel_python_2_7.state create mode 100644 gensim/test/ldamodel_python_3_5 rename gensim/test/{ldamodel_python3_5.tst.eta.npy => ldamodel_python_3_5.eta.npy} (100%) create mode 100644 gensim/test/ldamodel_python_3_5.expElogbeta.npy create mode 100644 gensim/test/ldamodel_python_3_5.json create mode 100644 gensim/test/ldamodel_python_3_5.state diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index b0d5e5e701..184d6cba82 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -1030,7 +1030,7 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately = None, *args, if (isinstance(self.eta, six.string_types) and self.eta == 'auto') or len(self.eta.shape) != 1: separately_explicit.append('eta') # Merge separately_explicit with separately. - if separately is not None and separately: + if separately: if isinstance(separately, six.string_types): separately = [separately] separately = [e for e in separately if e] # make sure None and '' are not in the list diff --git a/gensim/test/ldamodel_python2_7.tst b/gensim/test/ldamodel_python2_7.tst deleted file mode 100644 index e7b3af4d6487f0e6a0ebc84bbfcbbb85f3c1659f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3267 zcmZ{nc|6oz`^W8rY#Bn?BKvj^2}Ru2G6u<%#7wh%nUOKRvt?wBLMn+Y*-Ms0%55o9 zvXdoYY$4g*#8ec@GcCX0^Vf6zajtW&>vLY$xz6kS@jekeIA0)uN+ju%$auh?s_&1- z{F(_n;W3DxAz_ePD-z@h1$i}*(r7dVz|kpGA_G8EF$}<;jKNbuK5wJ|*~ftz)t^hg-O%KFlXqEm5rI6H|0wV@W*{I8N+Wsb z2jN_BZjNG)CY%?^ZSL&s?6||RUjv+rhK+N7Da6G!XTdia>8HASh;b z<1a<6EPI+|nCMGity>s#j5=zpQ=HiLdJ>cM{H@JqT;H3}<(qs4jipmOdZ{!UlFd{mME<_CP>)?@nrV7GGiYKAE4d37cVd5Ow%c-CS}-;g+68_l)V>y?+-<7Sl-EE5&WJ+EzbIp^;8 z31>SPt}}0RGfcq(BBBLx8^8EdS*}A6#Ng=UkcFvUre^?V~EOuRBB=~0p zs?i@V>$T>%3adb)B>&Jt3-c{rd{1>`}h4yQ-G;gL>#e@Z6&ttAlwy zD>rSVJv7GkKeP5Pg&u?2QV-Aw7W>2XUhcWRs&A<_)C5bKlDmdQ_&{#ow&-YkYTX@r zC+{ffv50ffZ1ktv5AClToG3S`8T$){?yx>As-W()tSbX;9}Y`ry2qiRXErWS>+gWJ zhF-vYhjqP}RyA=cx9*IToaL&`{hfSS1mMIPgRC32n_fGA;YOqS*Yb*7^YyDLtxJlG z$#|8!FI#Og1(m;*ZArK!+Do&Y77Il_$Un+IW)bq_T7`b&sAwTrf=Q|%)84rn0K=kmj zZb3G7q{&jP+EH6$FTFWiQy`<_Aysn5T3-39p_#I@^6O>vb1*;1^w@QL)6x zxn6|4Q$($Ba>G*r2=Wi1B+r9t^9WiX;rWZXT7~<9Cv$es!7muLEh| zNuL_W=k@u}F5gaFUr6=%k%=XP;LVa<@oP1sW!q_eIW?`s{cqN19J*TIEcJa`)R;?B zRptciU7n<6a^J#wk0k^pv+p`sSJJk8HZCJ3r|qKN)OD$7`RLeh2ATVBOtJV2^x_jX$1stf`Jr#W?$P?vvW#jXOCR=e+xq$>HA~XCOI~jpp zzH&G$qr!1uz-k&NptFBxt#8J_w>Y%F4Pw<)X`0Bbk-o6o0Hs2|;z4rbt(+(L8e(Ql(M~Nnz_f}ocPw2|b&F-i? z9_Z*Vs_?F+h~Z7_Y#9w=%S?)$HhVDtv3vZQhp)<6pS2KfK6?9!r+dvSx7o+`W_#5{ zjl7zk>cSN=e4f@%iUO*|wGY zvt1g9Ptvm7@#f#Nen673(4+MHuIpV@W*Rkm^9mk8vUQ%`7pMCuw!99hJuszBGeN=S zvGg^}s5O=CF}r7pP&@pE!4fijG;da*d)xYPp-iN0AFpSPu0g$wR?Bw8=u4PMrP}Z% zs4*~XBbFTUUShAC!F}0c28)@Q)Q{=e5UAQHoh6-jl!VEtnwRPL^wnofd9ne!L5WvQn~Qb$;oe6M*w zl3=I@%HLAc=1Ne>JnSQ(7Y}XBV@hW}#vjD#O>EvpUj)?b=3@IzZ8JMzER*huCEN9t z5i_75_K0OW0EgGjpqf@f&pgHaJJId@&?mvGg0^DR9kO)}CgRts;fR>j z6oK{3D6&)9hX8w7UGDGo>!+G8Vf@D@NN8c`?zz!YAT>Us;q`KwK zdp;VYojo^WTPx_Cm1ig7p2~O27WQad?6Ck;vO_hfcwl_Aoo_xpeXG`$&--NlP>Ob9 ztcK$G2!XaA4mz`7(bdDAJjo|b&ux`BhP}O4yw@20ZpRgjT4;5*nF_L-YAA=B`&HGo zJUgYe)v9Eg6vHiHRyy%dO}BJaft5R!I&F$gWrQdgRrqH$3h{h({n5j(d4er(7;WvZ z!dM})%BCc@fvm37`1d#IH@jM!(PGDxgFY9R2 za5-8jJuU?PrU0pDR)uCm;bS|!v$_gjT^vm=`tOm>7-C~$ww14JY#vq=5g%Y(S)9uV z$?b20yIs5!wy5nn-^{z(IbrkndI_N_5^gD$u5Zv%zLb!TaUrX7w=bur0X{vYZOu^~ z+d9wN$xM+%eP`M}FtofU=}B^%#;a zWyXc2&OgG9Uzm}tV4mGTqWW5)*H-t?ybkUr=edql8N{ut+FcD>!# zskMG1f07Mpn!A5Hzoc$;wX{o__|J<(QynEUUaJ13eocDJybyByRwt{wso~_FWuNw< z9&y2}ywo?;D9JT3h3$xK0FcjV~89BDC$xVE2UTx27$MxV54U0hWz_}e> z%(26IbS~J_ut;Hmf$>KJ41mG}W#Ga{Asm4oaGuHmS5Oww3qcC~L}LzBlc=B^Tm&i1 zSzaJh&I1%GxEIcis`=ed9rGXF68@KeuZCH~;_u diff --git a/gensim/test/ldamodel_python2_7.tst.expElogbeta.npy b/gensim/test/ldamodel_python2_7.tst.expElogbeta.npy deleted file mode 100644 index 64e0220059c945418618d9521909abd9bcb68f7d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 272 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7ItqqHnmP)#3NXNB%l=Xoi4;r=FuU?+{ zau1M}*VsL^>wAm+8~+OS=P2^cRt==90e8zS0 diff --git a/gensim/test/ldamodel_python2_7.tst.state b/gensim/test/ldamodel_python2_7.tst.state deleted file mode 100644 index d9598a13cf863d41c5137668628c64b3ae797824..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 510 zcmZo*N={GBE6&W-%gs+o%_-K)Nl65=xO`F)gG&-iQn?BlH5&_=Y73b)LOJ5&^Gb6I zDvJwQVnapa%?9K5T#V7vRpBFSgUIL`+-*!rWyZq07adq(e zWq&u=`z0va6^Hz^Kek)wT9`?xeQj0HYUW4m4ztb+ocZ;J-QgRX@R_#z|Lteg?Ge#` z{@>oJ{@C+#dVlR@6h3ZBeEHvgsZC??p^DA+B9HreHUnuMjYRi(2R7NOq_mzAs^E9n zGXLJep4H#%o!@G%JpJmw{j;XlwIP3c?U~XH-W#(v+bg!!e4A<0U|*6H%A8t~Sjd~f zk%0&kK1i4_wq@{ih6*r%!2^334b;O94D^)zGVfCfPs!X!eNXBlJ+ z2r|eZW7XO};OpvN(0%pYe%;qy)x8r>otsV-@aD;WhQ0P$Yd@skPF||0(5lp7>dIn8 zxi+l4SZaN8$}E;s$yPJqNrAA41$x$R_wup5{X73*K4$@YK25AtyYv*O8FRP3I%8D zDWv69N~v&G&Kp!2!sL9&Ftxs1r;=(kQiGFaZe@j5r_sxGPJ+wtD5r|z-=7NSL^jQW zt6U=Iq~%&CMTOe)+i`qJjBrV0BeHE3F6Rk8fg*eBYH@;Zc6N5g|MJWFOJt)HuE^Sz=f!S3 z=l6@I68?S={FJ=o)jZ*vPVkpmLAGL8S*4O|g#eMA*pXu;5^LUC;d-HPBT#J5LkgW# zxG6qkd3s-!T3%Tyvy9wQ{1va2tEvoIA+S&gvSMCYrBkW73KC0HaJzM)WuxD<&CzFi`_X;t*TI|d4VcTrA#VQm8*0HAuK(?So5Fh zs~0|45v`JHwF<2eE-JR>rKpyc|5fxqL{8R-%90YTLMKF|C;W?-_#FQ&m8#WJiB73eXqA=a#loHRgj`wHzyFu{f2=8Mq!q=L zYRPZ@-u;U?zXzQpl056D&n)DY$aR$(C*fXDo)8^qWp!45L~LwSv=F0oQCi6pt7Jyp z&l4UnBeJTP5^>7kEYRN(;(xQ?VV;n{EKvUc`X^fJzr*m-Li`DuLPJ>qt=+4>hbH=h z5gMUke8m=~sUwM6pG99$I?@WJE*7NWK!>JOYM};0DDCIu7hv!eb<(dJw@$poFEr3z znF=jrq(QGt>PQPh!>f2-cYMLjNvssnDyH&I__@+J4$Y;R_$IK?C#ytEl&(1~O575vJ3zM=f{twBz~``!L$jK6SswG-qwfOT=J}Cz{Y^ zON+sYXvQSI(IHJ{r$)%BgNramqf}4R1?b2rqQ(l$VGyg%JUm20&);J14kkihR;g>z z_9q>`!DPRZ-;WC&A9iG<$Soos9Re ziJxT7lVU+l?J3ksjo5>c`eWj8oL!_xJJ!f_lk*zmq*gE+iHTG#HvKBlht?xM5mLii^8+V}XNNSJ4 zm$&aYtEQW@i#3LGDvq}C*`J|YzEaIH>+t8w)>CUT#vQNHFQ)FkYYuwSJZK}6mayPV zGd_&V?Bn=aY`nn=_PnsbI@oygkoJ?Qk0xPs$Y&LO#kM9CyWXXD(EG^?gU_*^b;FZM z*P!QZzSBg03eD3Tb;V#Z@P88(>L@g$$nHF!e5vFtWnWuETrY`+?bev0JZ4dY{qDy7fsb#4!dSL0} zA2fi)EE>tN5V#$Co(YWOz+D?mzr<#p18cvz2!}F%+M$h#Li+ZAi)$_l#PIVwDQFY( zveb|99A?BMMqZb^a;Jq{EEnSIdzw;JFl~D=pF!&pQku}wtOKijhc+Y1s5KIciK#Yq zS1?;clO=H&q)u#n!Vb+pWEi_JgRjh@PTMNl3ROkH>`J3=s5{Fi6JISl8N*maO>LHI zUYP-{eysT&eQt=$$wF4u;71(MH#8_|Fqifox?L=LX2&CIv||6qbqov7O0b*3E_8%h zceWqu2rYHseJ38!BrS*6unR7qK?kj4mv>oZKO5#GCiz{f>KgUXHqE88{kDi`=;meG z*P<1T7qA_tOgdAbrj<%Y*~~$v1cxHe%jgtvfMsU;$}1N07za;M+lSkHzRn9U`oPEp z_!W-vYv{F%`DO%j&37^ALnbALu}D8du)?TKGROC+HJhd}l8*%%(+FI-gY!Jeh|%{2 zMP=LqcIf9jGNzo1@#fD=nJU94`b8V)&yulswA}y5`)_8jQ4S>1to&5SH9DqsKJHw2 zDa>`$Pi+o~WW-hsdTeiTOERC27hQRgLLIz2@6U;tWd<5zXXs^_cZEO46gAvsLe=A2 z9D3sTX@PU^O+{l7tKRI+MVz=pleOH|n$e%6X2|zlsVTb}?Orxa^%-pY<1=l&sw3kA zY{y53+tYp~{q&)FOeSAuw{1Dc4BX@DZN#zmS5uhw)A(jceGa3*xtT=RLDO0E$5A(X zT_F#2&x=_WLw9wdl+VG?)mzS|vVypQsata1G|BJ0d_;fapW1qupFSC z@ve`E#?}oQL8JR!1&t)Tpud);T`%Q@W6ld^UdzDf_EEo-`_axRY|{?Yb6)*6!!5!) zAP%b`WtegZqa__RpAHIQW2}GW>+RxF1XC~_>*+i9s-Zd>N^rvl?bYgCs0Y(_S!`Bm zk!bSgq|JQlUTS)3!&TXN9TCaLYJu@#66>Zb+aOgW&^WcYYXodCo3_cUQ*n2kIu&Ig z6IOz`J`;gV%MGkYCP-gkGVGM@Oi3aaubtaK4~<7inDiDgH`WdkDj}!-VL3%!>Q2vB9`Z`MQv=ESona!MCEBc1D&# zy#rRzOxsZ`-1R5aasT=A+NT)7L4a><5Y4Kw@Ya*o&N4OUbKcV|`W!CAo#faye7%W5 zH!YuY2wNxUi#`2NM-*Yn(y@lvfEKV^gnm2Pcu@D@9_xHj%COII^Xa%Fzs_uT&mc=0i6Kfy8@Vao4gKleo8GbYY3 z27}Rqwp&-;qW%=?>nJ^h<^?R`^Zf!l^goMv!_mtbANejj1M{W360Y8HuE5eZQ)_-Q zeS4qE^l8AFD6Wjnz1u+EQ3CYpUj>v5002m%$I3tHb#b_>RR6@ zY?iZrn@-!&ZY~{>>15zj+VF{lF{#|YnpRa@+Ex1IMh6NG>*TU_u$Oh~(M_G)(3-i* zQ+!KUz;wU|j6J=GF6w{5sMNFbZWi%1f09;CQqNN{_H#Xa|kFonFKjJ$T`^h#ML5);*w6()1WDEe{@G+2RYNA?7^!6h2G%!ON`gDUQHv$UFV4O_LYG*?51 zAK6XKPs%SaCwmf}Wdw7Bp5Xizy!k#e=!aQu92+$Lm~E#I-GzEO^0-sVvUH>-pT`!> zGadRaQ%4YE-ozxL}R_>hvkq$JD#+Gu7M=ONBMr zCv`V*zn!(+-g?J^X?OZJ+s9~^KS*D^-G0U?2e5cx;Dpw^Pu2DY;mNv73=i3teGNYK z!Say3Jf|$5QPvLo`8jP;ydJyl*{_>78oBMZFPyNcr0LBLdtc@(=a|i>>`(sO`{TV@ zt9}3EJB@c5Hru#`i zbA2HrqOgS!hJ>>+{6T|r{;;}xlkSCkKb+6`e9o6?uR5+9RyrHX<}^KHg)*ArB5_z# zVzy%IA_#*s;CTYxpxh(~LLy-}*~x+h4RP4B8?n@MEwVQbjk1sD@C+@dTHuqKWzp57 z$Bmq+YZf%cp;_*9W2UY;rj^d?2}{ZAnViCDF%J9W22Sg?0xharDx`90DC=bGv|^e{ zK}>LE&aiCLQEd@g>sg|q)mdnhggNl536)QzTB2^$fC%lfhx;aBzud_GjAEya3(!%7 z15r3wg3c&(1tpKu^g-NdLmc3jt7KR>4kUS+g9EIMX)ZqGEgCmhhg z%+dnwFnmTOEYlZS)oF{-HqM`oaldypsyzH)H;dkL-4$9r=defTxp|e}if;&KZRsMd z(;A26DSYX7h?iy|nu!G_Xq7{%;^L@qiN10?MjQ8Og(}^&%>og6Lp$^_?;oNi{EZcS z8oZ4^SRo93UHE}E*~sWETAaYQxWNLKd-1tT-=%3Pc5&%9?&rfIy%O5mu*~eoxPU+D fg&$`be#fmF;i;QA&ERxxvB4+|+HfZ6Bt!oIi0~e! diff --git a/gensim/test/ldamodel_python_2_7 b/gensim/test/ldamodel_python_2_7 new file mode 100644 index 0000000000000000000000000000000000000000..f2ee3d6f09b20936db0484970bc49c348d3f92a6 GIT binary patch literal 3267 zcmZ{nc|6oz`^SwM%hAuIF zL}f{CiWV8P;Fb`#y(DFcdZy*~d;WT^KhAZ|b$!n3I@kH*yxu2Km>5c?GQmirNE(Su zVH#0L_#ZQb3kmP~J%oT@dM6>_1ZW!yP{iRFWFniv1f$3}CO(Qxq2WnPXnPPKLJJ9D zl35Vk!FEaLhXDwXMP}exAdSj|M6f1+Oej7w5|3krG04m?8ifRjI@o#;T>C$iZvrB8 zJd;UgLSk4FAVFnE;@ET&o+Su~W5ogaNRSFf3W9V7jesYB6p+P%5DvDFeg+XZoac|M*Y_lj$f7Y2kc^%uB#Qz>UA>JD9yG;3a=*3^X$-PaB%8tl z@eBr@gZO?!f|gkfHj#yZcKlQ!sHC4+NFFPc0>B7qo{$0x3v;Clr~FcoSR6VT0Vx8) zY&2E~3lk`mP}pq%%-YS(&H0W^ysty7J0r9d9wE|R zHc;cg5*_elrczI9C+O|36rZ&J0CPnDiG|2H#rR9wcMpr&$_|k^0C1~VZs@sPSo?>K-D_3JAP8~7fni$n{84eXw6JOq|RP}uxIQUoMgbJqa zw}aI6Px0qKsoA-Wcfj1yd*f-=QT$HQ%h*mtP_zN@40YyiYthZhqRDLvy7A8nj~RU} zIjxbc>G=iAOVCk4)9E_WS1xm}=Jl)uijTY!y>EUv>!J3Z(v5*8s$tZd2J*9k)K3l? z#aZp)vhd5ux$p#ri0Wp6j*v5Ha4A;J=uLN(h8AnT*I--qMYlJ56Vd{6X#e!s zuBa=A)FxTHQqpt=PnfHy8*@hx(70!p(L zhvj{DIl3nMWoxdRJt_bfz~0HIM~|zrd>J!B1=TONb3VsPK2;gfT}E|QT}B=))qyAM zw>`OLepqTGsAwVI)NQ-m3FPv`P(KcM;m>b&=@WM~@XB_RbZS)pf?BU=@Ib!J(4fRE zg|-~KUpsRK%F8&E_d57CZu@lT(xbR)`4@NnmSTpW0V^@zO!#eTLfm zwCOH>@AN*nGdp?4Xv5!aWUK}WT2IRR`ddjaFN)CjMRV_R!GJE@U&E)ud=rd?kD0XY zqa@#{(Ly02l@!)E1=}9f-N~SamtK3<;axte7IC^anmh#L)s-j2MrWnh8LgsY8;9`8 z>eTLef=$~5X4@?5Z^Jo*+WUs~@xQ+eQoSHHm36v7oK%e$jhQfi8}xopDQc+;y*8T6 zyKb{lda1C*rTm`fh-UkI_H3x;)t3lu5 zNt8amL_X(Jd)%pMdt$`MMTTK`?q|KK1GwHPOR{FLv6SfNI7Hi{O|y%Y6${s%NnN2l zQ*?hxgx`&AlDuAIHg7Oe7LppcAO-uF5fzyikKL7Kl8wmkZbhoqdW{pM_Y@!ZNbbTa z-Li3ULG3TEiw~Hu@9%uxP__QI)Fs(y%eobr4yQ3$e9BVUL&WZ>ObvXcFHX!yNzw}CR~~;3jWPc)Q0kNvI98j+fU7>UNJ;h)uNJfFL=eVRBOfK~kswHh zdwEEfJ!~Ik0T>xK&HEqH12-hBWTdSRd#d-(y6N1u&oL=HidZ4?f9HL>QazhlBJ+9= zt9|=&6~IY;ZdEL?L)%1I@zOEITX&xPLZ3BhuiJ@BIL>q1IXUFNDZHh*OJk08#qQ6m>;HghJw?}ds68<+7s~8;@9-oYTa?x5!rT@R=x&gX z$$@Rk_$vt>!Su|81=4f$tqJ{wS8e>$ar5M(w9C7ta-}a4Za%00LN*hh$~Bn8iH;&Y z$ogYWX|H8n2Yc26>r_%!KvSQ5cMiWD=Dkt|pO0Re3Rv8419Hc;r)PB9vTB4;X4ueL zyJKzaS-xRe`Wd;n3bCR3SG6i0-4^1|`S7Yv*i-d32LqHorsHF5Q3Y9%=dOR{3eU%- z6C6=IT0GkPAvb4)xi8&gU*F-KDkD4eosCa>7UaQ~I{Z_v2EVS@^4WNZ_*(hAq!Asv zB3q3t^fc{Om%WopVXvkJ8pUL{T>G+GrxUab+H}H8T?R@90$U$kLZ7B47Z(n9>EgkG zPg81_fcII`HgD(SO6BeM#miqvE~=OJ#UPxuxOufx?U3>W&gGly4LPB%2@@}H@<2gv z3T$IUD^PEjU*old`!^NDHZwk386?;YvO>%{u3bnQIOoW3kRfVh=?!7?YHmSKcSgtR z2$}CqvJ#W#oWPh^?&rg&c{lE$#r-;+9{2khKuPxWLknsV6S=j8M|K@5MQm04rkPC& zO4TqfQY~)t3-uOF&&~1n;%5wNsvUMs%bY5Yf?2+}nz5vcC9<<9)TC^WV1Ek z;(qIOd8$eyGTEnHcgy#-8Fh?v1^KeY)v`V5p1xwRc2m{Ro>+@CDA4GB)|x|e|O*S&7L_V!F%F)~W%l%u>$#KLXQ zbKKD~mZc3!r%O5+n*;0T?H(Kw(U{B3i~d7+Oh!r8R@aU{tk6&+oC|MY5nRn-SeSz? zpJ=+VBwxrP00?pvo`NGskr^CF8H)hKiD7JN1XBR6kcul`6cGQ8#saF2WI{-+B!Cbs zqiKu?GJ^@JVqpOMC*7)r?FjoXW~vJ?6Dyn&CIHJ2i(g==@xQ}NPXIw`ZK z#s2l3gVrJ|Hre;DJk$McW{drmq77$2;_sO3e$3j^V*j+zU$t*Ri@luOQuP%vr|fUM zSi$x&@sxe-q`avLYY*D56Z-V2$9{|b#5nEnd$wEb(`Sj^JrAVid)E{m28vsiNj1yF zF0r3wb!m2v#u0m^kI7(h8*#nAuFm`HcPG!X?RDB>zjndYXE$@p?JMrryjZMRZm+2k KtgI?oZVv#e)^tGt literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python2_7.tst.json b/gensim/test/ldamodel_python_2_7.json similarity index 100% rename from gensim/test/ldamodel_python2_7.tst.json rename to gensim/test/ldamodel_python_2_7.json diff --git a/gensim/test/ldamodel_python_2_7.state b/gensim/test/ldamodel_python_2_7.state new file mode 100644 index 0000000000000000000000000000000000000000..fcc60724d1d10eed0b41b3810f7dc98d8d63c4aa GIT binary patch literal 510 zcmZo*N={GBE6&W-%gs+o%_-K)Nl65=xO`F)gG&-iQn?BlH5&_=Y73b)LOJ5&^Gb6I zDvJwQVnapaAKz|YaOq2|?)g^xInfcXt`vN*=W&%|WaDbKzv`ng z10?=jn(IKCajSjf+TAPJCO6y9lx7ai{>bidlx2yY^EY;fJ!fNr@4x$R|88cX2iKMV z_U%_!d=0<&-~QTj&jSuXI$F$SX(CYkn1{c1nB05&y$}EW{Q8F9A#MHnB_Q#~U+rg} zdByIqv}4Vun>YU3bDcf$GgPzIKBWBH_C=M&_HUhy7Ozn#v@b~tWlk+gEac7L$UuY% vA0$i|+cJ1MLj@SX;DJ4i2I^r426{?!95h$8Cy3SwwdG%6TF5+KsDlhEZolt}MEn$!SF3B5NN zW3E4N#{C8N{)2PB-f_n`X93>x<>mwI{p_c#XU)0heD(ju*@Mk!oO z`MdR5Q)zjLRHxLGXa!5Ly+p*FxTUg8Q>4^r1uL<+#EP5LGObpr6|BXI|LY;um8n&7 ztzaXzkl1sNGPO=s!V^oCI++*OGMkIRn{+{9*L^A^E9L+}U`UpSlX zvEZ4Sn)>RW{5$(2Hq!}Sd2NM`lFK~i{6|yq|Gp5s3wcGK48d0?_~o4iQkI{uRqBLm ziShq9?eHxowM^q-8|14Z&y|=+)9l9 z&)+khVxnS*c(0r<;kG#Z4Bk`V%GC;)j)CzP+nxcVtdJE;l@&@&wGbc{No;teRC<=5CofcLgnMV`Yjqr&a9?>w4~?u;QC1>7 zZwvd)i1S8~RGM-2H1l+TRIV%2h=lN<3?U*=V$A`iWQ9F^7!fT*7TOh_ks_*)k$#XN zL^IMUYKA(d@EqOpNa5i*y0IC;BSyFI|Ks}j3|9lEU-y9a6`Ayn7NTn_RWs@ExW^oi!B&||g=hvu|}9ed~dZ#-~J?Md8kk)~)F4d~9G zF|;Xs>7W*~w1*!syj4-XzZ~o6GNZN6MOM_nPYhUjPg|8AsDtLQ6y$eZmG%DW7c}0x zucfhI>WHyIuj?0#Uc>%V>lYZrcQ-PX%VPx$xY9n>u$x>#GaU3!iy}J2i5RPSJ{XMk zPAH&ZG-=RA({5OfwnNjia7>1WViS{^L>@3lMmnTbcbKS;cF6FM8c|P1_h_vD`LLAs zY5NO$RSuk)MLM9BY+8JdUo?%D#}%Ajy@Cd7NH5B*43VR&l!M>LF~+cgC5+MRg)0GH zs2d}%L+KYL(CLG2-wZkwW4_McJyNaqcf=IFV-nLaP#10G*wPpcSyTOkr|6*OYqwzJ zMGZJ8y;DJx*j7?E-!;>Aq7TNoN5f}lOMLgB#m{vbD^EJYu)o^G+bNoxGq3efysQdyM_AjUX3BNlwI&Rf=21*hqsL2o(RkNF6glTi0v>3YVdmI zMPD&e7l>7?iEv0wFUm2Tz;s!`_jpWP=l$=}24`!Ww#h)pkNCC)2h@in%U7{jsk@9; zj%mRYoI{Mlkax^u8hrMQ=43^5f)0Gs z($MRGE3z;w(3S*aDM{HdqT7{5T_iVNra=FWA*KB&YQwOKOe*T3*$Qf-c9_s1(a^jZ zhPiSEFILk|jXia`(^vp)1acvn7`W#!`dilBi$??PJ$ULsW4X*469ywvFJAiO!HF?) zYWk(w^zA+M;xL)|Y*Ruq(Vc3agdJ}>3}o&vVmsY~x!>kfTuW;OIC@R>SY;-52{54T z2{UW_BX*@1>1P40QClsR6VOM8<-wS&q-KU;6AR{;$l;8vQ=b?8l3|?I9){h$bM5Xm zb@Dq}38M8hEr&cG7J28dIHyN6oaaX!oRL22$1#@xhOE?gE1Adpm{fhB7AZ9{v`vMQ zx-g5^;W)lq%u2C`6~91H$<-^gSB>Ed%&m29Yb*WC37w%8YRcjnC%lT#Nj;TH8p-DH z*RfDnQ^`2AVm*|0BWtijBQof{OR1HX^5Sqp#^?*Qprh}1XfcHMU4JLx%U?8@>xpkR zG{+-5v2Clsvdp@Qj1OogmsYu|0yz^ck+eq>9N0YO;+Q2yjA8^w$ymTImMvdupe1zE zLO}*L9B2qfk!3cd_oWFdGzD;dA9^KV=WnbBcczad~1~lEIiR3y~!!8VSJ|?Pau@cEs z&)cyKv}06>-nUqNlZ1nds4rq+9n#$SjDFe)g6Y1LnKu`!^!*s-U`;!uTGv3TIOTx6FPo{h6w8#90fD6mZ|9pjk<|l zFv)tR$7CG!WZbO6oQj%oR29Vr_bApG(@``ViQ~UjCsg5t&29d^8TJ#Y|E0HMyk{ow ztI2`G+T`k)ji7lN$NUFs7TFZjbi9gID{?X*)+^?=q{YK%!}%%LgPl@^VqCw0fLrRv|>V%g>q+d>{q zdzMgJ-7UL3`_Gx__Ka;a(`rJO9qp9rOZBvl)*L#pd*tN!_-TeWr*9$EAG=(i;TU-? zqAfY~{>Acj!ZyL?jCM>o(fBPLHC&_(n6j%Ghk-{Jx4V2T+|3oEuQfE2hItoaGkdJK zo%#u*(rVU-dal+vhG~iao6Ko|rNe=3q*shSjQFA+E9!Et#nH!Jm<@8r02jtFE!q}f z?mZ4%F!Mf;dXsP{W8eFR#wZ$g%Y7$e3jc}-vfy3lvuMkKHsa8Fzvv1pa4(1T6B8ah z$pY8ku~pGWd)9-YM>Is835>@w2E&s7sN?!PAX7Sq71Wbkp3doM<3BMG$Vzza-LpNbXPz|nn08*TsLXnC7PZnLeg^6haJEq|l~2PwN3EAGAre)4f= zAlikF0xhfA1054+?IHssw|zd!aV>T6;bQKx7j-SgSzPBn(DJ=F z6=!Yw8lMN}{kb|juwoGyB`(0~i`O_QKPAc?w%}HDU8=>R4JUt-<63o3r`8x-KG}U{ ziPW1@r#DU#Ljr@NSv@vuc%O?(SInmcb4^TP{%tbpRW#=MoK0(jxv+O>d=k>}Of3pX-(fG*5vHDYVce7le>zMXE&a}D57I5{^^TuMJ}3s!Z5~6!^yjXENe!TG04i~ zb=zpv*XwBtmsmR&`$C0zEZ5`K%do$6=!t&Ed);GiX~eHQ%zsRZW4$mWr!hsxS*x1e zvE#v7N(}*YEWvJ6DNR*l%7afEM=7yk?G-Z-87RQxbW`_)4 zF9x+XT>i8xya?kJwpl`2)0oc9*SNS>$Yd$InnEZ3Sh8aw9S@DX5+2Rxfgb9*nn3NB z*~44oFqZG*Ow*Sx-m=Cx==ua(<(6e2b^NDZ%CTqZm6{`~)lA#hmENIH)>|6LyOKmTwU(FxD;e!tLb3J#t@!t#od-f`R{>|X`SAqnwd3j#w FzW|A_4}Smv literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python3_5.tst.eta.npy b/gensim/test/ldamodel_python_3_5.eta.npy similarity index 100% rename from gensim/test/ldamodel_python3_5.tst.eta.npy rename to gensim/test/ldamodel_python_3_5.eta.npy diff --git a/gensim/test/ldamodel_python_3_5.expElogbeta.npy b/gensim/test/ldamodel_python_3_5.expElogbeta.npy new file mode 100644 index 0000000000000000000000000000000000000000..1971e44b14889570af360270f96a8f047331e0b4 GIT binary patch literal 272 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7ItqqHnmP)#3NXMmW%;eE=KEUgrEcx$xqGg~{}NPXIw`ZK z#s2l3gVrJ|Hre;DJk$McW{drmq77$2;_sO3e$3j^V*j+zU$t*Ri@luOQuP%vr|fUM zSi$x&@sxe-q`avLYY*D56Z-V2$9{|b#5nEnd$wEb(`Sj^JrAVid)E{m28vsiNj1yF zF0r3wb!m2v#u0m^kI7(h8*#nAuFm`HcPG!X?RDB>zjndYXE$@p?JMrryjZMRZm+2k KtgI?oZVv#e)^tGt literal 0 HcmV?d00001 diff --git a/gensim/test/ldamodel_python_3_5.json b/gensim/test/ldamodel_python_3_5.json new file mode 100644 index 0000000000..f64397704b --- /dev/null +++ b/gensim/test/ldamodel_python_3_5.json @@ -0,0 +1 @@ +{"0": "interface", "1": "human", "2": "computer", "3": "response", "4": "system", "5": "user", "6": "time", "7": "survey", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file diff --git a/gensim/test/ldamodel_python_3_5.state b/gensim/test/ldamodel_python_3_5.state new file mode 100644 index 0000000000000000000000000000000000000000..8f995aeab80414d530ba493550d5396c9879f6a4 GIT binary patch literal 737 zcmX|9TTc@~6yCPb!dOKSP(%UmMPd`>#RrmYf+pw^ObX~G*fiVSg*LX^p3WB22NH;( z#YAFMY9N@P0znBEH5LQjl5>8TPPdKoF!Rlv@7%sKGj4s-w0I&FOr;Go$%9Ela}XOc zv=LjgO$JiWEV!n@9Z)@zB$>7b9^J~NGWnpM&YHngE@>yUY*x#&Xx7xz7PqrG-DXf% zWiiXBTA^N%=D@2*b&*Qv%(N))Mm*Q4j z1vpjgRfI})6ai@q*|;bOHfe*rGVn=6%d2_f1W`>04Obck~@ z?)PJHN<#r$suF0k zgVD@HUxho1JNN@XdGWO%;c|y(>@j`ug=nr0*MkGJ?+f8iT6i=e)&Kwi literal 0 HcmV?d00001 diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index 982e5fab38..4be73428a1 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -362,21 +362,23 @@ def testPersistence(self): model.save(fname) model2 = self.class_.load(fname) self.assertEqual(model.num_topics, model2.num_topics) + print ("model expElogbeta: %s ", type(model.expElogbeta)) self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - # # Method used to save LDA models in Python 2.7 and 3.5 environments. + # Method used to save LDA models in Python 2.7 and 3.5 environments. # def testSaveModelsForPythonVersion(self): - # fname = os.path.join(os.path.dirname(__file__), 'ldamodel_python3_5.tst') - # model = self.model + # fname = os.path.join(os.path.dirname(__file__), 'ldamodel_python_2_7') + # corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + # model = ldamodel.LdaModel(corpus, id2word=dictionary, num_topics=2, passes=100, random_state = 1000007) # model.save(fname) # logging.warning("LDA Model saved") def testModelCompatibilityWithPythonVersions(self): - fname_model_2_7 = os.path.join(os.path.dirname(__file__), 'ldamodel_python2_7.tst') + fname_model_2_7 = os.path.join(os.path.dirname(__file__), 'ldamodel_python_2_7') model_2_7 = self.class_.load(fname_model_2_7) - fname_model_3_5 = os.path.join(os.path.dirname(__file__), 'ldamodel_python3_5.tst') + fname_model_3_5 = os.path.join(os.path.dirname(__file__), 'ldamodel_python_3_5') model_3_5 = self.class_.load(fname_model_3_5) self.assertEqual(model_2_7.num_topics, model_3_5.num_topics) self.assertTrue(numpy.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta)) From 8fb383c396dc887ae77f2eca2801a055ba80f9f7 Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Wed, 7 Dec 2016 23:06:22 +0530 Subject: [PATCH 06/15] * Fixed PEP8 fixes. * Moved LDA model test data files to test_data/ folder. * Saving id2word dictionary in binary format. --- gensim/models/ldamodel.py | 6 +++--- gensim/test/test_data/ldamodel_python_2_7 | Bin 0 -> 3267 bytes gensim/test/test_data/ldamodel_python_2_7.bin | 1 + .../test/test_data/ldamodel_python_2_7.eta.npy | Bin 0 -> 96 bytes .../ldamodel_python_2_7.expElogbeta.npy | Bin 0 -> 272 bytes gensim/test/test_data/ldamodel_python_2_7.state | Bin 0 -> 510 bytes gensim/test/test_data/ldamodel_python_3_5 | Bin 0 -> 4700 bytes gensim/test/test_data/ldamodel_python_3_5.bin | 1 + .../test/test_data/ldamodel_python_3_5.eta.npy | Bin 0 -> 96 bytes .../ldamodel_python_3_5.expElogbeta.npy | Bin 0 -> 272 bytes gensim/test/test_data/ldamodel_python_3_5.state | Bin 0 -> 737 bytes gensim/test/test_ldamodel.py | 8 ++++---- gensim/utils.py | 2 +- 13 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 gensim/test/test_data/ldamodel_python_2_7 create mode 100644 gensim/test/test_data/ldamodel_python_2_7.bin create mode 100644 gensim/test/test_data/ldamodel_python_2_7.eta.npy create mode 100644 gensim/test/test_data/ldamodel_python_2_7.expElogbeta.npy create mode 100644 gensim/test/test_data/ldamodel_python_2_7.state create mode 100644 gensim/test/test_data/ldamodel_python_3_5 create mode 100644 gensim/test/test_data/ldamodel_python_3_5.bin create mode 100644 gensim/test/test_data/ldamodel_python_3_5.eta.npy create mode 100644 gensim/test/test_data/ldamodel_python_3_5.expElogbeta.npy create mode 100644 gensim/test/test_data/ldamodel_python_3_5.state diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 184d6cba82..e587428e0f 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -1042,13 +1042,13 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately = None, *args, # If id2word is not already in ignore, then saving it separately in json. id2word = None if self.id2word is not None and 'id2word' not in ignore: - id2word = dict((k,v) for k,v in self.id2word.iteritems()) + id2word = dict((k,v) for k, v in self.id2word.iteritems()) self.id2word = None # remove the dictionary from model super(LdaModel, self).save(fname, ignore=ignore, separately = separately, *args, **kwargs) self.id2word = id2word # restore the dictionary. # Save the dictionary separately in json. - id2word_fname = utils.smart_extension(fname, '.json') + id2word_fname = utils.smart_extension(fname, '.bin') try: with utils.smart_open(id2word_fname, 'w', encoding='utf-8') as fout: json.dump(id2word, fout) @@ -1068,7 +1068,7 @@ def load(cls, fname, *args, **kwargs): kwargs['mmap'] = kwargs.get('mmap', None) result = super(LdaModel, cls).load(fname, *args, **kwargs) # Load the separately stored id2word dictionary saved in json. - id2word_fname = utils.smart_extension(fname, '.json') + id2word_fname = utils.smart_extension(fname, '.bin') try: with utils.smart_open(id2word_fname, 'r') as fin: id2word = json.load(fin) diff --git a/gensim/test/test_data/ldamodel_python_2_7 b/gensim/test/test_data/ldamodel_python_2_7 new file mode 100644 index 0000000000000000000000000000000000000000..f2ee3d6f09b20936db0484970bc49c348d3f92a6 GIT binary patch literal 3267 zcmZ{nc|6oz`^SwM%hAuIF zL}f{CiWV8P;Fb`#y(DFcdZy*~d;WT^KhAZ|b$!n3I@kH*yxu2Km>5c?GQmirNE(Su zVH#0L_#ZQb3kmP~J%oT@dM6>_1ZW!yP{iRFWFniv1f$3}CO(Qxq2WnPXnPPKLJJ9D zl35Vk!FEaLhXDwXMP}exAdSj|M6f1+Oej7w5|3krG04m?8ifRjI@o#;T>C$iZvrB8 zJd;UgLSk4FAVFnE;@ET&o+Su~W5ogaNRSFf3W9V7jesYB6p+P%5DvDFeg+XZoac|M*Y_lj$f7Y2kc^%uB#Qz>UA>JD9yG;3a=*3^X$-PaB%8tl z@eBr@gZO?!f|gkfHj#yZcKlQ!sHC4+NFFPc0>B7qo{$0x3v;Clr~FcoSR6VT0Vx8) zY&2E~3lk`mP}pq%%-YS(&H0W^ysty7J0r9d9wE|R zHc;cg5*_elrczI9C+O|36rZ&J0CPnDiG|2H#rR9wcMpr&$_|k^0C1~VZs@sPSo?>K-D_3JAP8~7fni$n{84eXw6JOq|RP}uxIQUoMgbJqa zw}aI6Px0qKsoA-Wcfj1yd*f-=QT$HQ%h*mtP_zN@40YyiYthZhqRDLvy7A8nj~RU} zIjxbc>G=iAOVCk4)9E_WS1xm}=Jl)uijTY!y>EUv>!J3Z(v5*8s$tZd2J*9k)K3l? z#aZp)vhd5ux$p#ri0Wp6j*v5Ha4A;J=uLN(h8AnT*I--qMYlJ56Vd{6X#e!s zuBa=A)FxTHQqpt=PnfHy8*@hx(70!p(L zhvj{DIl3nMWoxdRJt_bfz~0HIM~|zrd>J!B1=TONb3VsPK2;gfT}E|QT}B=))qyAM zw>`OLepqTGsAwVI)NQ-m3FPv`P(KcM;m>b&=@WM~@XB_RbZS)pf?BU=@Ib!J(4fRE zg|-~KUpsRK%F8&E_d57CZu@lT(xbR)`4@NnmSTpW0V^@zO!#eTLfm zwCOH>@AN*nGdp?4Xv5!aWUK}WT2IRR`ddjaFN)CjMRV_R!GJE@U&E)ud=rd?kD0XY zqa@#{(Ly02l@!)E1=}9f-N~SamtK3<;axte7IC^anmh#L)s-j2MrWnh8LgsY8;9`8 z>eTLef=$~5X4@?5Z^Jo*+WUs~@xQ+eQoSHHm36v7oK%e$jhQfi8}xopDQc+;y*8T6 zyKb{lda1C*rTm`fh-UkI_H3x;)t3lu5 zNt8amL_X(Jd)%pMdt$`MMTTK`?q|KK1GwHPOR{FLv6SfNI7Hi{O|y%Y6${s%NnN2l zQ*?hxgx`&AlDuAIHg7Oe7LppcAO-uF5fzyikKL7Kl8wmkZbhoqdW{pM_Y@!ZNbbTa z-Li3ULG3TEiw~Hu@9%uxP__QI)Fs(y%eobr4yQ3$e9BVUL&WZ>ObvXcFHX!yNzw}CR~~;3jWPc)Q0kNvI98j+fU7>UNJ;h)uNJfFL=eVRBOfK~kswHh zdwEEfJ!~Ik0T>xK&HEqH12-hBWTdSRd#d-(y6N1u&oL=HidZ4?f9HL>QazhlBJ+9= zt9|=&6~IY;ZdEL?L)%1I@zOEITX&xPLZ3BhuiJ@BIL>q1IXUFNDZHh*OJk08#qQ6m>;HghJw?}ds68<+7s~8;@9-oYTa?x5!rT@R=x&gX z$$@Rk_$vt>!Su|81=4f$tqJ{wS8e>$ar5M(w9C7ta-}a4Za%00LN*hh$~Bn8iH;&Y z$ogYWX|H8n2Yc26>r_%!KvSQ5cMiWD=Dkt|pO0Re3Rv8419Hc;r)PB9vTB4;X4ueL zyJKzaS-xRe`Wd;n3bCR3SG6i0-4^1|`S7Yv*i-d32LqHorsHF5Q3Y9%=dOR{3eU%- z6C6=IT0GkPAvb4)xi8&gU*F-KDkD4eosCa>7UaQ~I{Z_v2EVS@^4WNZ_*(hAq!Asv zB3q3t^fc{Om%WopVXvkJ8pUL{T>G+GrxUab+H}H8T?R@90$U$kLZ7B47Z(n9>EgkG zPg81_fcII`HgD(SO6BeM#miqvE~=OJ#UPxuxOufx?U3>W&gGly4LPB%2@@}H@<2gv z3T$IUD^PEjU*old`!^NDHZwk386?;YvO>%{u3bnQIOoW3kRfVh=?!7?YHmSKcSgtR z2$}CqvJ#W#oWPh^?&rg&c{lE$#r-;+9{2khKuPxWLknsV6S=j8M|K@5MQm04rkPC& zO4TqfQY~)t3-uOF&&~1n;%5wNsvUMs%bY5Yf?2+}nz5vcC9<<9)TC^WV1Ek z;(qIOd8$eyGTEnHcgy#-8Fh?v1^KeY)v`V5p1xwRc2m{Ro>+@CDA4GB)|x|e|O*S&7L_V!F%F)~W%l%u>$#KLXQ zbKKD~mZc3!r%O5+n*;0T?H(Kw(U{B3i~d7+Oh!r8R@aU{tk6&+oC|MY5nRn-SeSz? zpJ=+VBwxrP00?pvo`NGskr^CF8H)hKiD7JN1XBR6kcul`6cGQ8#saF2WI{-+B!Cbs zqiKu?GJ^@JVqpOMC*7)r?FjoXW~vJ?6Dyn&CIHJ2i(g==@xQ}NPXIw`ZK z#s2l3gVrJ|Hre;DJk$McW{drmq77$2;_sO3e$3j^V*j+zU$t*Ri@luOQuP%vr|fUM zSi$x&@sxe-q`avLYY*D56Z-V2$9{|b#5nEnd$wEb(`Sj^JrAVid)E{m28vsiNj1yF zF0r3wb!m2v#u0m^kI7(h8*#nAuFm`HcPG!X?RDB>zjndYXE$@p?JMrryjZMRZm+2k KtgI?oZVv#e)^tGt literal 0 HcmV?d00001 diff --git a/gensim/test/test_data/ldamodel_python_2_7.state b/gensim/test/test_data/ldamodel_python_2_7.state new file mode 100644 index 0000000000000000000000000000000000000000..fcc60724d1d10eed0b41b3810f7dc98d8d63c4aa GIT binary patch literal 510 zcmZo*N={GBE6&W-%gs+o%_-K)Nl65=xO`F)gG&-iQn?BlH5&_=Y73b)LOJ5&^Gb6I zDvJwQVnapaAKz|YaOq2|?)g^xInfcXt`vN*=W&%|WaDbKzv`ng z10?=jn(IKCajSjf+TAPJCO6y9lx7ai{>bidlx2yY^EY;fJ!fNr@4x$R|88cX2iKMV z_U%_!d=0<&-~QTj&jSuXI$F$SX(CYkn1{c1nB05&y$}EW{Q8F9A#MHnB_Q#~U+rg} zdByIqv}4Vun>YU3bDcf$GgPzIKBWBH_C=M&_HUhy7Ozn#v@b~tWlk+gEac7L$UuY% vA0$i|+cJ1MLj@SX;DJ4i2I^r426{?={Z8bB$b z_aYHdHy@I`>s{q(bI!F^qm8^sS*}%;MV3`4l%?9p zQiaU&Pn52ZWt=sM1go$n!MZ`P36*$SSy@S?8l}8aqg7QYrCM2)va~{`&b1AinU{!;0b8ktU2QLYv2#L*HbZc?bUI*qEJQm0f1_6CVFH)&+$ zii$F+RwvUb1qXR~WtqA*^1M+bmF8I9zB+p;mFp@rBEc~%M-YWd?0INLZq)tzG4X=a zU!&v|8f9czWvNaj(`aP1qBEO$b)BYCt`iB)=eq%1%C zBNE&s);xEC;4ZP_-%|FVJW9CKDqPMHJlcdSIf7@X*g8XP^F_G&*Nh5Xt(pgVNo;sP zWvt*Wwh~*n2|hW3Z>ac!Wx9ugUuI_Ji+}RR@+G#?3H}9!VmFD=)yhhDj}8>C=qoM8 z<&6Sp;3=Nd@5x)#6$x&)Ly=}g$b_sf*&Q=fdL2jE}<1#q+`#Z zvKZ`PiPp2wL5ErI(2XJ5Y`_ZrAU!rb(ydi5GwApuEl8oqTn&z_X&bxF?sxz2!3lMw zala*+rWG`yCx^yiPz2IpJ?3a1-(h^CqQ+o3HqdQF>z_*OsEOYgvHFg-tKU;6Enqn; z=(;NR-PO-%zH?Vg<OggU9yIFobVjWUiDa3K;aH1FT~&y^3bp=pQyE zbc9ne)(QeJl<1#QL?dX?z(6xzSc!K+%adqKMMq)_Q<_vBFi$2rqBU=rsh@Vqc%Pcl zNG9)iZ2W$|oDOK`Gx}669GN9Lq}A87^b|j725k?kIJ|lV4b_oelwTboM|U|Je}EH= zV-w35r@0GPLOxRuMqftKPfVgK06l>@bR@|)X?(*JW?-al+Rk&NaT>O# z#(R&^Nv+pz!o-Ujaaewskbc~T;s}~9y zq4heyNSdN{m;-`zcV5v*ebK{)5Lp$b3en*e8Nj}pB53(_(Zd)TqaW|za0d59SSj>C zr}GEwM7dCt-xELjg3*RhtYKY*BWih8iIEg8msNaA#^iPOe~&gfS`)NGMml-Gk8L=l zejMArNW^NxWwf(Ri@uo17Nb40mQ1I@f3HF|p*ewuuLizQeMBGErv&}!9OwCgN(XUdz=(>>D(a=V zDl$+9%;=P8Xu%32jGUp1wX|F3OkLhI9zvU;3?wrr?kP;cwhecZ(M0?A9=p(ZK39zy zLot~bFMagkz?e8R1Jc*@^&R!$D4qHpGa_E0C(}6%yZ&?(%C*0QoopYjeM3NLJ*^kv z_$4)BjVrNRfDwjATv-zzuqVApKZgd}oT8d!5 z8*in2{*#9CeeudxNz<(r|bYjU`O1!8a>kE7)Xo{Q~o%_FmmH5+WG zDueN}_y*dd(IaB^Z^wy`AJ$&cjNJ7mCjQ8Vp4!A$v7MBj9(soZw(EaIR;c9>jd_VZFvWbP$5aya=KNBFc@?$bxF(JT?m?nEX5wfr7AJqJO{u{ti`&9o zD;%WKz;l1sWZzfpSBndqwZ-U}i=hRYz`}cK6*-jBOtOmBs&N>6o4(c*q+=q4Mp;ZS z3-gOoCf&_6HtHMg>r&5HK?cSNEjbop{v8fIF#9f)`qFSDW8M3T<~SPh%6}{368;$zX2V|Sb7|X!Hj~hP zx8w>la37oX1Cu^H$s*(L#IEFnGxNdl0~)5T6wb#AhN9AcYhZjHk~teA3hK?T%;s=T zspVMbSnFdGsK*iCpR2Lu%e$FIn!BgGj>-5xu!3DINN8KkYJEV*;Veu8td06QWiT}* zmD8>r8F|^JtCbgUZ0*m3JLtqd>G~(8#OB8_QMvBXmr|MP*FJJxbtE{}qK!FpkOg|f ziA%tUHvbp3SoUT-bw*P!MpJ0Xg%_Bjeibvak*)gz1MU3gYWpjV{>rkx#*ZhtwDN%t zU8JmD%(w?W_`%zq!FUfk4z;ai4RlSR{pTE+86_PHZVdf?HqwY0+VP;Sa`x%4qRP?T z-#R(TY9mrnOW~YP*=*J4v6e zz3@ISD+2BD=(nb0w|7_1#FyE#qk+r{8e}>lQ#hB$F!Mtny6S0J`LqJxesRHoD~1cO z$<8gkE@U}uq^?WYiLSS&NuO+GI8CSV@)PbE9JOxR&%=f{zRPb+tlR2;RLYrG#^2xfG4V$~)zPF#ewXD@MDc~&U5Sc2QpeW@Nx4jlX~ zwrkBho7xi`d1v>DDN=7uUH&*tjR*~oXZF~tV?URao><5VXG~0E;Y~W}RW$DTltpWj zYhi=Qw4cVcx6z(`oBAJLXT`l7{vrxpIjqD+Tehv`9fNe{1CzlLoxEYJw~9FX%QVQj z+>NcHZFj8rU^FL&+Wp-b7Oka~w8!iiS%V%NGm{?i<3@1)4S|l+WG?vj8k1yX{p8q< z#e%b`=e@-!Z>-uvWGb$h9h&#U*lSwj6#Yn|Sz4}#$s^_VXAg#xo;}*~ki?5#1hE_# zS;FS2N$?_*G@VW}uztyk+Q=0BN=0*bSr>Zo#hOhX56TO>@s`VG_Wm6@zT&_MGtlmX zh=wE@(G|uqN6zxxZ49Ztv#Q&1lLw$Pj7G)0!YZ}}>bhQC7i@L!S2RCxaIO6qADwuI z`fM4t3s}c~tm9kKr!mnDyp0ghoT7e2X1@|Lad6i#`sBtVm_&4frQZY5qsT{VKorKg z(s25=DA%4d$~nk=#p@bqEYR<921Bfa!M<2!oyd6HejfFw4!!Yj+1Gv6mS+4cz`}>L zB<2fadKQ=H1anoZH+FrPOQ|V@P9)fiE2rsNO#AR|<2WOcw~0I~-1EaVCVew2>Kv1B zj~)N%7T3h6Xvvf7Vd(uEnA{?;^r4g$$gIJa1oUDmkvH($&)jIQl#6na)_9xQ9>d%I z9<;n7QzA~P8D>YEzCH|T9T@(!Cp?Sd6?T|H+OxQvTdy&=SIKNEyP83#!B}=;BAtkg zy%HVI`+;8SzM4WEms!KxlQ3Qw;7&7_F5a}qW+n?;iy{*Srid2o)zS#3{2NXmUuG~E zV!3`!u`6IJ>i$Et#a{7YEsKB8DgR1s&)DBh2O9T5Z;%%q7QV$i%h~~_q>B@=5Qg5c zC;_dQ;*7H7_rZLN1$ZOA5baK`0r53CG!aL$0;kth_<^~<4L=)XG!TOMWKO~?y6Z9F zQ)Pvg=N{=LykFSM;W?%lQggb*q|jh$bA_Je!kTw{3%@W2jM_0fu^<_%@!f`|-LASP zhdg5%7`t+IK(N7pEp~)}Vm|W*<_JML;aWk)J!`RbYVzOZG5_-qk5Qd)y+FdpR;voN zPF1F=Q%Yr}>SCF2!(h1#IDh!1*tytp!oFF|$L3#igkU~4i}}&(L2IYOjPxKm&`tXC8kYLzMNz?7%YCKAYPC$ORceJqu|nd;eT!seWiqL*SfkVySClG*2dT-~ z1sVVG-;4kI*X&xkN?ofJ9_9;47Q>}dRZ%%#eei|MqkJJb)Y8Z|EozxgUaZszk1bg_ zzhHTywD@1CR-E0aoO}CJY;AGH5+|jmOe_2*cK*M}Raa}NPXIw`ZK z#s2l3gVrJ|Hre;DJk$McW{drmq77$2;_sO3e$3j^V*j+zU$t*Ri@luOQuP%vr|fUM zSi$x&@sxe-q`avLYY*D56Z-V2$9{|b#5nEnd$wEb(`Sj^JrAVid)E{m28vsiNj1yF zF0r3wb!m2v#u0m^kI7(h8*#nAuFm`HcPG!X?RDB>zjndYXE$@p?JMrryjZMRZm+2k KtgI?oZVv#e)^tGt literal 0 HcmV?d00001 diff --git a/gensim/test/test_data/ldamodel_python_3_5.state b/gensim/test/test_data/ldamodel_python_3_5.state new file mode 100644 index 0000000000000000000000000000000000000000..7f84844f64ebf325f01e5a5e3fcd49d1b3de8696 GIT binary patch literal 737 zcmX|9T~8B16y3Ja!dOKSP(%U0i^L|%iw`8*1WnK-m=w@WuxYlt3vFz-y`3$l4Svhsl|H&zv)vduH7Fq-pU)Dws+eW|9Y!hUOqP zWN0I{W}6J8o>_2BgFB#lB}w93R5^HbE0@aTgL*n^22;7DozSvbEzhD^Q%_sm&gOKR zL0y%_ETh_mdPSN8uO8Kfo6ebORR9B@>Jrj1XmBi&nw_w2Kw|+|7>*RdABLj=#ih6v ze*uowBn>;C5jUGux41cR3yv$2;wr+4Ff<1gkE7QQCx?fJ@BJ0a*%Zl!me{knPwf(R z?Tvj@q!l{EIT`o+vD8~XibWX@Tl}gcoy8sefuFqiT99zL!!!1nzW72kSBLAt0owP4@Fy)i8lhF% zr9U$5;xY^}S`-+IRKkx`en>AXoj0_3mu5oFVZ7}&ZfFjR3WN81A9T{5jKAnT{iXvd z({@v60{6sg*K4FTDhqddPFuoo1Ltv@zS1%+31LPHG*1U(J=5~&6WYh`_#w|_+8MZs zYq;GFGPK6U0NTQE%7)W1wL$!HH0sQpLwf|ysLZJ(CM}TwgF{CI&Z@0WO++A<<%uaX z%C#vonbt)6Tm(8*pJ)pSBdv4jQW|TbTu)^39J(WLUiAouX=`vHUKe+=axpFfa48Iz ks{wEo;7Yu?GQwuTtS)*d{|9ij^6EesdTh8B%f*8K0AxZT)&Kwi literal 0 HcmV?d00001 diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index 4be73428a1..538f470453 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -367,18 +367,18 @@ def testPersistence(self): tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - # Method used to save LDA models in Python 2.7 and 3.5 environments. + # # Method used to save LDA models in Python 2.7 and 3.5 environments. # def testSaveModelsForPythonVersion(self): - # fname = os.path.join(os.path.dirname(__file__), 'ldamodel_python_2_7') + # fname = os.path.join(os.path.dirname(__file__), 'ldamodel_python_3_5') # corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) # model = ldamodel.LdaModel(corpus, id2word=dictionary, num_topics=2, passes=100, random_state = 1000007) # model.save(fname) # logging.warning("LDA Model saved") def testModelCompatibilityWithPythonVersions(self): - fname_model_2_7 = os.path.join(os.path.dirname(__file__), 'ldamodel_python_2_7') + fname_model_2_7 = datapath('ldamodel_python_2_7') model_2_7 = self.class_.load(fname_model_2_7) - fname_model_3_5 = os.path.join(os.path.dirname(__file__), 'ldamodel_python_3_5') + fname_model_3_5 = datapath('ldamodel_python_3_5') model_3_5 = self.class_.load(fname_model_3_5) self.assertEqual(model_2_7.num_topics, model_3_5.num_topics) self.assertTrue(numpy.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta)) diff --git a/gensim/utils.py b/gensim/utils.py index dfca56f5f3..1e70295a08 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -909,7 +909,7 @@ def unpickle(fname): """Load pickled object from `fname`""" with smart_open(fname, 'rb') as f: # Because of loading from S3 load can't be used (missing readline in smart_open) - if sys.version_info > (3,0): + if sys.version_info > (3, 0): return _pickle.load(f, encoding='latin1') else: return _pickle.loads(f.read()) From 99cd0801b7cb906546472b37c02d354d83a7aaab Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Wed, 7 Dec 2016 23:09:32 +0530 Subject: [PATCH 07/15] Removed old LDA model files --- gensim/test/ldamodel_python_2_7 | Bin 3267 -> 0 bytes gensim/test/ldamodel_python_2_7.eta.npy | Bin 96 -> 0 bytes gensim/test/ldamodel_python_2_7.expElogbeta.npy | Bin 272 -> 0 bytes gensim/test/ldamodel_python_2_7.json | 1 - gensim/test/ldamodel_python_2_7.state | Bin 510 -> 0 bytes gensim/test/ldamodel_python_3_5 | Bin 4700 -> 0 bytes gensim/test/ldamodel_python_3_5.eta.npy | Bin 96 -> 0 bytes gensim/test/ldamodel_python_3_5.expElogbeta.npy | Bin 272 -> 0 bytes gensim/test/ldamodel_python_3_5.json | 1 - gensim/test/ldamodel_python_3_5.state | Bin 737 -> 0 bytes 10 files changed, 2 deletions(-) delete mode 100644 gensim/test/ldamodel_python_2_7 delete mode 100644 gensim/test/ldamodel_python_2_7.eta.npy delete mode 100644 gensim/test/ldamodel_python_2_7.expElogbeta.npy delete mode 100644 gensim/test/ldamodel_python_2_7.json delete mode 100644 gensim/test/ldamodel_python_2_7.state delete mode 100644 gensim/test/ldamodel_python_3_5 delete mode 100644 gensim/test/ldamodel_python_3_5.eta.npy delete mode 100644 gensim/test/ldamodel_python_3_5.expElogbeta.npy delete mode 100644 gensim/test/ldamodel_python_3_5.json delete mode 100644 gensim/test/ldamodel_python_3_5.state diff --git a/gensim/test/ldamodel_python_2_7 b/gensim/test/ldamodel_python_2_7 deleted file mode 100644 index f2ee3d6f09b20936db0484970bc49c348d3f92a6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3267 zcmZ{nc|6oz`^SwM%hAuIF zL}f{CiWV8P;Fb`#y(DFcdZy*~d;WT^KhAZ|b$!n3I@kH*yxu2Km>5c?GQmirNE(Su zVH#0L_#ZQb3kmP~J%oT@dM6>_1ZW!yP{iRFWFniv1f$3}CO(Qxq2WnPXnPPKLJJ9D zl35Vk!FEaLhXDwXMP}exAdSj|M6f1+Oej7w5|3krG04m?8ifRjI@o#;T>C$iZvrB8 zJd;UgLSk4FAVFnE;@ET&o+Su~W5ogaNRSFf3W9V7jesYB6p+P%5DvDFeg+XZoac|M*Y_lj$f7Y2kc^%uB#Qz>UA>JD9yG;3a=*3^X$-PaB%8tl z@eBr@gZO?!f|gkfHj#yZcKlQ!sHC4+NFFPc0>B7qo{$0x3v;Clr~FcoSR6VT0Vx8) zY&2E~3lk`mP}pq%%-YS(&H0W^ysty7J0r9d9wE|R zHc;cg5*_elrczI9C+O|36rZ&J0CPnDiG|2H#rR9wcMpr&$_|k^0C1~VZs@sPSo?>K-D_3JAP8~7fni$n{84eXw6JOq|RP}uxIQUoMgbJqa zw}aI6Px0qKsoA-Wcfj1yd*f-=QT$HQ%h*mtP_zN@40YyiYthZhqRDLvy7A8nj~RU} zIjxbc>G=iAOVCk4)9E_WS1xm}=Jl)uijTY!y>EUv>!J3Z(v5*8s$tZd2J*9k)K3l? z#aZp)vhd5ux$p#ri0Wp6j*v5Ha4A;J=uLN(h8AnT*I--qMYlJ56Vd{6X#e!s zuBa=A)FxTHQqpt=PnfHy8*@hx(70!p(L zhvj{DIl3nMWoxdRJt_bfz~0HIM~|zrd>J!B1=TONb3VsPK2;gfT}E|QT}B=))qyAM zw>`OLepqTGsAwVI)NQ-m3FPv`P(KcM;m>b&=@WM~@XB_RbZS)pf?BU=@Ib!J(4fRE zg|-~KUpsRK%F8&E_d57CZu@lT(xbR)`4@NnmSTpW0V^@zO!#eTLfm zwCOH>@AN*nGdp?4Xv5!aWUK}WT2IRR`ddjaFN)CjMRV_R!GJE@U&E)ud=rd?kD0XY zqa@#{(Ly02l@!)E1=}9f-N~SamtK3<;axte7IC^anmh#L)s-j2MrWnh8LgsY8;9`8 z>eTLef=$~5X4@?5Z^Jo*+WUs~@xQ+eQoSHHm36v7oK%e$jhQfi8}xopDQc+;y*8T6 zyKb{lda1C*rTm`fh-UkI_H3x;)t3lu5 zNt8amL_X(Jd)%pMdt$`MMTTK`?q|KK1GwHPOR{FLv6SfNI7Hi{O|y%Y6${s%NnN2l zQ*?hxgx`&AlDuAIHg7Oe7LppcAO-uF5fzyikKL7Kl8wmkZbhoqdW{pM_Y@!ZNbbTa z-Li3ULG3TEiw~Hu@9%uxP__QI)Fs(y%eobr4yQ3$e9BVUL&WZ>ObvXcFHX!yNzw}CR~~;3jWPc)Q0kNvI98j+fU7>UNJ;h)uNJfFL=eVRBOfK~kswHh zdwEEfJ!~Ik0T>xK&HEqH12-hBWTdSRd#d-(y6N1u&oL=HidZ4?f9HL>QazhlBJ+9= zt9|=&6~IY;ZdEL?L)%1I@zOEITX&xPLZ3BhuiJ@BIL>q1IXUFNDZHh*OJk08#qQ6m>;HghJw?}ds68<+7s~8;@9-oYTa?x5!rT@R=x&gX z$$@Rk_$vt>!Su|81=4f$tqJ{wS8e>$ar5M(w9C7ta-}a4Za%00LN*hh$~Bn8iH;&Y z$ogYWX|H8n2Yc26>r_%!KvSQ5cMiWD=Dkt|pO0Re3Rv8419Hc;r)PB9vTB4;X4ueL zyJKzaS-xRe`Wd;n3bCR3SG6i0-4^1|`S7Yv*i-d32LqHorsHF5Q3Y9%=dOR{3eU%- z6C6=IT0GkPAvb4)xi8&gU*F-KDkD4eosCa>7UaQ~I{Z_v2EVS@^4WNZ_*(hAq!Asv zB3q3t^fc{Om%WopVXvkJ8pUL{T>G+GrxUab+H}H8T?R@90$U$kLZ7B47Z(n9>EgkG zPg81_fcII`HgD(SO6BeM#miqvE~=OJ#UPxuxOufx?U3>W&gGly4LPB%2@@}H@<2gv z3T$IUD^PEjU*old`!^NDHZwk386?;YvO>%{u3bnQIOoW3kRfVh=?!7?YHmSKcSgtR z2$}CqvJ#W#oWPh^?&rg&c{lE$#r-;+9{2khKuPxWLknsV6S=j8M|K@5MQm04rkPC& zO4TqfQY~)t3-uOF&&~1n;%5wNsvUMs%bY5Yf?2+}nz5vcC9<<9)TC^WV1Ek z;(qIOd8$eyGTEnHcgy#-8Fh?v1^KeY)v`V5p1xwRc2m{Ro>+@CDA4GB)|x|e|O*S&7L_V!F%F)~W%l%u>$#KLXQ zbKKD~mZc3!r%O5+n*;0T?H(Kw(U{B3i~d7+Oh!r8R@aU{tk6&+oC|MY5nRn-SeSz? zpJ=+VBwxrP00?pvo`NGskr^CF8H)hKiD7JN1XBR6kcul`6cGQ8#saF2WI{-+B!Cbs zqiKu?GJ^@JVqpOMC*7)r?FjoXW~vJ?6Dyn&CIHJ2i(g==@xQ}NPXIw`ZK z#s2l3gVrJ|Hre;DJk$McW{drmq77$2;_sO3e$3j^V*j+zU$t*Ri@luOQuP%vr|fUM zSi$x&@sxe-q`avLYY*D56Z-V2$9{|b#5nEnd$wEb(`Sj^JrAVid)E{m28vsiNj1yF zF0r3wb!m2v#u0m^kI7(h8*#nAuFm`HcPG!X?RDB>zjndYXE$@p?JMrryjZMRZm+2k KtgI?oZVv#e)^tGt diff --git a/gensim/test/ldamodel_python_2_7.json b/gensim/test/ldamodel_python_2_7.json deleted file mode 100644 index 5ff1321c8e..0000000000 --- a/gensim/test/ldamodel_python_2_7.json +++ /dev/null @@ -1 +0,0 @@ -{"0": "interface", "1": "computer", "2": "human", "3": "response", "4": "time", "5": "survey", "6": "system", "7": "user", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file diff --git a/gensim/test/ldamodel_python_2_7.state b/gensim/test/ldamodel_python_2_7.state deleted file mode 100644 index fcc60724d1d10eed0b41b3810f7dc98d8d63c4aa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 510 zcmZo*N={GBE6&W-%gs+o%_-K)Nl65=xO`F)gG&-iQn?BlH5&_=Y73b)LOJ5&^Gb6I zDvJwQVnapaAKz|YaOq2|?)g^xInfcXt`vN*=W&%|WaDbKzv`ng z10?=jn(IKCajSjf+TAPJCO6y9lx7ai{>bidlx2yY^EY;fJ!fNr@4x$R|88cX2iKMV z_U%_!d=0<&-~QTj&jSuXI$F$SX(CYkn1{c1nB05&y$}EW{Q8F9A#MHnB_Q#~U+rg} zdByIqv}4Vun>YU3bDcf$GgPzIKBWBH_C=M&_HUhy7Ozn#v@b~tWlk+gEac7L$UuY% vA0$i|+cJ1MLj@SX;DJ4i2I^r426{?!95h$8Cy3SwwdG%6TF5+KsDlhEZolt}MEn$!SF3B5NN zW3E4N#{C8N{)2PB-f_n`X93>x<>mwI{p_c#XU)0heD(ju*@Mk!oO z`MdR5Q)zjLRHxLGXa!5Ly+p*FxTUg8Q>4^r1uL<+#EP5LGObpr6|BXI|LY;um8n&7 ztzaXzkl1sNGPO=s!V^oCI++*OGMkIRn{+{9*L^A^E9L+}U`UpSlX zvEZ4Sn)>RW{5$(2Hq!}Sd2NM`lFK~i{6|yq|Gp5s3wcGK48d0?_~o4iQkI{uRqBLm ziShq9?eHxowM^q-8|14Z&y|=+)9l9 z&)+khVxnS*c(0r<;kG#Z4Bk`V%GC;)j)CzP+nxcVtdJE;l@&@&wGbc{No;teRC<=5CofcLgnMV`Yjqr&a9?>w4~?u;QC1>7 zZwvd)i1S8~RGM-2H1l+TRIV%2h=lN<3?U*=V$A`iWQ9F^7!fT*7TOh_ks_*)k$#XN zL^IMUYKA(d@EqOpNa5i*y0IC;BSyFI|Ks}j3|9lEU-y9a6`Ayn7NTn_RWs@ExW^oi!B&||g=hvu|}9ed~dZ#-~J?Md8kk)~)F4d~9G zF|;Xs>7W*~w1*!syj4-XzZ~o6GNZN6MOM_nPYhUjPg|8AsDtLQ6y$eZmG%DW7c}0x zucfhI>WHyIuj?0#Uc>%V>lYZrcQ-PX%VPx$xY9n>u$x>#GaU3!iy}J2i5RPSJ{XMk zPAH&ZG-=RA({5OfwnNjia7>1WViS{^L>@3lMmnTbcbKS;cF6FM8c|P1_h_vD`LLAs zY5NO$RSuk)MLM9BY+8JdUo?%D#}%Ajy@Cd7NH5B*43VR&l!M>LF~+cgC5+MRg)0GH zs2d}%L+KYL(CLG2-wZkwW4_McJyNaqcf=IFV-nLaP#10G*wPpcSyTOkr|6*OYqwzJ zMGZJ8y;DJx*j7?E-!;>Aq7TNoN5f}lOMLgB#m{vbD^EJYu)o^G+bNoxGq3efysQdyM_AjUX3BNlwI&Rf=21*hqsL2o(RkNF6glTi0v>3YVdmI zMPD&e7l>7?iEv0wFUm2Tz;s!`_jpWP=l$=}24`!Ww#h)pkNCC)2h@in%U7{jsk@9; zj%mRYoI{Mlkax^u8hrMQ=43^5f)0Gs z($MRGE3z;w(3S*aDM{HdqT7{5T_iVNra=FWA*KB&YQwOKOe*T3*$Qf-c9_s1(a^jZ zhPiSEFILk|jXia`(^vp)1acvn7`W#!`dilBi$??PJ$ULsW4X*469ywvFJAiO!HF?) zYWk(w^zA+M;xL)|Y*Ruq(Vc3agdJ}>3}o&vVmsY~x!>kfTuW;OIC@R>SY;-52{54T z2{UW_BX*@1>1P40QClsR6VOM8<-wS&q-KU;6AR{;$l;8vQ=b?8l3|?I9){h$bM5Xm zb@Dq}38M8hEr&cG7J28dIHyN6oaaX!oRL22$1#@xhOE?gE1Adpm{fhB7AZ9{v`vMQ zx-g5^;W)lq%u2C`6~91H$<-^gSB>Ed%&m29Yb*WC37w%8YRcjnC%lT#Nj;TH8p-DH z*RfDnQ^`2AVm*|0BWtijBQof{OR1HX^5Sqp#^?*Qprh}1XfcHMU4JLx%U?8@>xpkR zG{+-5v2Clsvdp@Qj1OogmsYu|0yz^ck+eq>9N0YO;+Q2yjA8^w$ymTImMvdupe1zE zLO}*L9B2qfk!3cd_oWFdGzD;dA9^KV=WnbBcczad~1~lEIiR3y~!!8VSJ|?Pau@cEs z&)cyKv}06>-nUqNlZ1nds4rq+9n#$SjDFe)g6Y1LnKu`!^!*s-U`;!uTGv3TIOTx6FPo{h6w8#90fD6mZ|9pjk<|l zFv)tR$7CG!WZbO6oQj%oR29Vr_bApG(@``ViQ~UjCsg5t&29d^8TJ#Y|E0HMyk{ow ztI2`G+T`k)ji7lN$NUFs7TFZjbi9gID{?X*)+^?=q{YK%!}%%LgPl@^VqCw0fLrRv|>V%g>q+d>{q zdzMgJ-7UL3`_Gx__Ka;a(`rJO9qp9rOZBvl)*L#pd*tN!_-TeWr*9$EAG=(i;TU-? zqAfY~{>Acj!ZyL?jCM>o(fBPLHC&_(n6j%Ghk-{Jx4V2T+|3oEuQfE2hItoaGkdJK zo%#u*(rVU-dal+vhG~iao6Ko|rNe=3q*shSjQFA+E9!Et#nH!Jm<@8r02jtFE!q}f z?mZ4%F!Mf;dXsP{W8eFR#wZ$g%Y7$e3jc}-vfy3lvuMkKHsa8Fzvv1pa4(1T6B8ah z$pY8ku~pGWd)9-YM>Is835>@w2E&s7sN?!PAX7Sq71Wbkp3doM<3BMG$Vzza-LpNbXPz|nn08*TsLXnC7PZnLeg^6haJEq|l~2PwN3EAGAre)4f= zAlikF0xhfA1054+?IHssw|zd!aV>T6;bQKx7j-SgSzPBn(DJ=F z6=!Yw8lMN}{kb|juwoGyB`(0~i`O_QKPAc?w%}HDU8=>R4JUt-<63o3r`8x-KG}U{ ziPW1@r#DU#Ljr@NSv@vuc%O?(SInmcb4^TP{%tbpRW#=MoK0(jxv+O>d=k>}Of3pX-(fG*5vHDYVce7le>zMXE&a}D57I5{^^TuMJ}3s!Z5~6!^yjXENe!TG04i~ zb=zpv*XwBtmsmR&`$C0zEZ5`K%do$6=!t&Ed);GiX~eHQ%zsRZW4$mWr!hsxS*x1e zvE#v7N(}*YEWvJ6DNR*l%7afEM=7yk?G-Z-87RQxbW`_)4 zF9x+XT>i8xya?kJwpl`2)0oc9*SNS>$Yd$InnEZ3Sh8aw9S@DX5+2Rxfgb9*nn3NB z*~44oFqZG*Ow*Sx-m=Cx==ua(<(6e2b^NDZ%CTqZm6{`~)lA#hmENIH)>|6LyOKmTwU(FxD;e!tLb3J#t@!t#od-f`R{>|X`SAqnwd3j#w FzW|A_4}Smv diff --git a/gensim/test/ldamodel_python_3_5.eta.npy b/gensim/test/ldamodel_python_3_5.eta.npy deleted file mode 100644 index a5ceb80b2d236cf16d807ef7e63979bed00ebf4b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 96 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= dXCxM+0{I$7ItqrGItsN4aKOa?1`q6EGyo3%7kmH! diff --git a/gensim/test/ldamodel_python_3_5.expElogbeta.npy b/gensim/test/ldamodel_python_3_5.expElogbeta.npy deleted file mode 100644 index 1971e44b14889570af360270f96a8f047331e0b4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 272 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7ItqqHnmP)#3NXMmW%;eE=KEUgrEcx$xqGg~{}NPXIw`ZK z#s2l3gVrJ|Hre;DJk$McW{drmq77$2;_sO3e$3j^V*j+zU$t*Ri@luOQuP%vr|fUM zSi$x&@sxe-q`avLYY*D56Z-V2$9{|b#5nEnd$wEb(`Sj^JrAVid)E{m28vsiNj1yF zF0r3wb!m2v#u0m^kI7(h8*#nAuFm`HcPG!X?RDB>zjndYXE$@p?JMrryjZMRZm+2k KtgI?oZVv#e)^tGt diff --git a/gensim/test/ldamodel_python_3_5.json b/gensim/test/ldamodel_python_3_5.json deleted file mode 100644 index f64397704b..0000000000 --- a/gensim/test/ldamodel_python_3_5.json +++ /dev/null @@ -1 +0,0 @@ -{"0": "interface", "1": "human", "2": "computer", "3": "response", "4": "system", "5": "user", "6": "time", "7": "survey", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file diff --git a/gensim/test/ldamodel_python_3_5.state b/gensim/test/ldamodel_python_3_5.state deleted file mode 100644 index 8f995aeab80414d530ba493550d5396c9879f6a4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 737 zcmX|9TTc@~6yCPb!dOKSP(%UmMPd`>#RrmYf+pw^ObX~G*fiVSg*LX^p3WB22NH;( z#YAFMY9N@P0znBEH5LQjl5>8TPPdKoF!Rlv@7%sKGj4s-w0I&FOr;Go$%9Ela}XOc zv=LjgO$JiWEV!n@9Z)@zB$>7b9^J~NGWnpM&YHngE@>yUY*x#&Xx7xz7PqrG-DXf% zWiiXBTA^N%=D@2*b&*Qv%(N))Mm*Q4j z1vpjgRfI})6ai@q*|;bOHfe*rGVn=6%d2_f1W`>04Obck~@ z?)PJHN<#r$suF0k zgVD@HUxho1JNN@XdGWO%;c|y(>@j`ug=nr0*MkGJ?+f8iT6i=e)&Kwi From 2ecde2cd7dfbfd2043cdffd44540951a83a925a5 Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Wed, 7 Dec 2016 23:27:39 +0530 Subject: [PATCH 08/15] Fixed numpy as np in test_ldamodel.py --- gensim/test/test_ldamodel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index a7c531d333..6f375e36e3 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -422,9 +422,9 @@ def testModelCompatibilityWithPythonVersions(self): fname_model_3_5 = datapath('ldamodel_python_3_5') model_3_5 = self.class_.load(fname_model_3_5) self.assertEqual(model_2_7.num_topics, model_3_5.num_topics) - self.assertTrue(numpy.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta)) + self.assertTrue(np.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta)) tstvec = [] - self.assertTrue(numpy.allclose(model_2_7[tstvec], model_3_5[tstvec])) # try projecting an empty vector + self.assertTrue(np.allclose(model_2_7[tstvec], model_3_5[tstvec])) # try projecting an empty vector def testPersistenceIgnore(self): fname = testfile() From 237eff4615df0466e5037bda974347df6f61da00 Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Thu, 8 Dec 2016 02:47:31 +0530 Subject: [PATCH 09/15] Recreated lda model files in python 3.5 --- gensim/test/test_data/ldamodel_python_2_7 | Bin 3267 -> 3439 bytes .../test/test_data/ldamodel_python_2_7.state | Bin 510 -> 588 bytes gensim/test/test_data/ldamodel_python_3_5 | Bin 4700 -> 4912 bytes gensim/test/test_data/ldamodel_python_3_5.bin | 2 +- .../test/test_data/ldamodel_python_3_5.state | Bin 737 -> 825 bytes gensim/test/test_ldamodel.py | 2 +- 6 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/test/test_data/ldamodel_python_2_7 b/gensim/test/test_data/ldamodel_python_2_7 index f2ee3d6f09b20936db0484970bc49c348d3f92a6..38865be9495bd387217fac5f2f9e713d569ad591 100644 GIT binary patch delta 702 zcmb`FKW`H;7{+sfG|A=K5K?Gb{uEjYDbPw3B&d+cgfg50sKNt7WPE2QXEk@{*Y*`s zhAc!V8&!4?BMS>3fiHmg1mzO|dk369RmF}M%l51H{XNg)Z;QW{-fxzRf=T=;mK5t) za9_dc!NEr5MW%`DC<)EOE|UsoTnpJ;Gf6a$BL!J^3l-ZW3<=hOWGaX|6ms?2Gq3UE z&*vjFBZyK=LEfdPVEW)hP@>I+eekLBv{0|T-TzttGXDl;DDz1doO(_C=PShO6kn{4wR}W+jxFwd;9KvI6Hi3 ztvxFHX1M6u5hYTRt_|~Us*f!1-42|qx>iG^`_l@lyMozp9;K6eZpyVx#lqJQ)=|D! zacz|MgCyEjh5{BF-?OL`@`#56Kz4m)5 zGBeR~DJTagM3)Vbo9+jOq4i+Ouv{H}v=)X%XSH`_fN~fspNp=7tIg42EKb2n6IM}y zy%vuz%??C?>N&nH;rtJc{_?FqYAD!;YJl@CjM2CZ7df=`vH@%&X6++3$r~Y wyKvJ)pXGF`H~J)S>j^qF8gTm`@{JL>(+`#h=;cJ=VeNnKv&Rj!mw20h0ig}otpET3 delta 530 zcmYL`y>1gh5XXH6f1QtnI047uJHg;kL?R$T8Z3~Ii^T{9@|f=A#55IqG?0Q4zX+mV=NG=Gie_n(>H8{ZnAc8;IbkIwg?nD+ac8!U9% ze{%BxR~W9S5ow~aNOoa)K;xMDCQ@8S>5yTm(|#3nfBgM?3Pnk^<{Ha{K{Xl0{zx)v ztWY5pSdB#@Vk=6O1~d>uVKQ_(?GJ~)I$ze_L%G0sNHeS+|ExE?2`r`3h*%tQpAKb2 z@xlZ=MH80BzBYC$E=?QG=F0N+K3;4=sr&l=-rj>pxH5g=G@sN$i_K9U4dQ`o%eSdI;7a2LaiFlUdL^nw|AAL6kmSdaMf4w^A+AkNob~BbZ|ms#1j@dz zLm@MbH+r*TZe8Qe9^Ps}mA{kEhv^{ThTxYAv4nb4A&FyabZ9K6L-eEx>33~Srgc1f^Zy9UF1FGs=7KjLMvHvj+t diff --git a/gensim/test/test_data/ldamodel_python_2_7.state b/gensim/test/test_data/ldamodel_python_2_7.state index fcc60724d1d10eed0b41b3810f7dc98d8d63c4aa..424cf0096d575be4c284091eb699b87b08fb2927 100644 GIT binary patch delta 99 hcmeyze1>JiF~)G7)(oD`&;$lBcwkR0dh&TjeE|FX5~~0J delta 21 ccmX@Z@{f7LF-A@%Z^pI^p3YE#$+ArP08*X?X#fBK diff --git a/gensim/test/test_data/ldamodel_python_3_5 b/gensim/test/test_data/ldamodel_python_3_5 index 380a6956863aa53333c25c2072d33d859400073b..bf5297e9e8b92745a52f426cddb0dffd0e4e58f2 100644 GIT binary patch delta 1026 zcmb_bOHUI~6rKTjv>*=wMe&I|iU>+U9*R~)ta5E76Bl<=C-b;46WW$7=G#OJA#d3jVj(ESJ=+LFqZEe*qO3!srCYwu zEFNcG^bv;`RWZzjIIYKb-Vm-g22HP^S%ae+(4s+Wf~cf<9oh~O+M*O-Tsy5n|D|a- zMiioMK!*mM2~sPw%|cgqHk)1ei6$RJ5pdkBI5C>v+HLD9Va&T5gvQ;CmvsD(B z;2eq5W^}3s9f0SFO6w3rLX8q!NTni)V_Q3I-E%|HP?t0q7I4|59hj@&dTs#+3qs#COgHa} z5{#r%FW0`NcRF7j&W+_(Sjb_NsGB1_! z%#*R1nei!@I~3|(B-FeH_fe=!fJ&vE>NON6;6e2o9%`_F*Wh&ha|R?jgAcFO?Uvq0 fk*XODvH~8PnMswX>C~Pz{(Zk=Tfm}OG)I2{j_x&& delta 855 zcmZWn%Wl&^6m|Nbts5F>Q%ZRiN?XD!&~mbF-n9i7T!d|*q+u_6WiA_4x}uy zp;XyXb=4hPHVBDN;0G$!EZ`5y79qjSxUgXs5x0i6}-GQ07&!)?}p?qcB^UT~Q@Zl2^3lV0EpI6`L3euNxP$jGx|$sOov znQ4R~<~u=+3dtmgUfYjrVI!Y_I;95lcYC`s*}lsL=sB|o38iP#OG~p0(7*G1vYAc@ zy6>ZW%^Sm&@fRzj3LJXcEp?EjRNi?%Ep| zJL}>L=L{H=aNa7-r%0+;*tchY9t`bCxM1z9WSiqHRXhY} zi-m(EPW2AkKxGhmwt#8Up&Av0(yMu!oN~!vrlrDoomDBX^QZx{MBi<8j5gfxF5jrQW(9L>wpf zUIhv{1>z`%OxiAwU`6#HnZWy8B^!qh&f{Tf4@gSoqKd+!nt+ES{kKdnf@p(B0v=%y bj_0`MIWvekuv%6LwMmJZfXAdSw(@@f)C&ag diff --git a/gensim/test/test_data/ldamodel_python_3_5.bin b/gensim/test/test_data/ldamodel_python_3_5.bin index 70838d6d99..a31dbd6199 100644 --- a/gensim/test/test_data/ldamodel_python_3_5.bin +++ b/gensim/test/test_data/ldamodel_python_3_5.bin @@ -1 +1 @@ -{"0": "interface", "1": "computer", "2": "human", "3": "time", "4": "response", "5": "system", "6": "survey", "7": "user", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file +{"0": "interface", "1": "human", "2": "computer", "3": "system", "4": "user", "5": "survey", "6": "time", "7": "response", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file diff --git a/gensim/test/test_data/ldamodel_python_3_5.state b/gensim/test/test_data/ldamodel_python_3_5.state index 7f84844f64ebf325f01e5a5e3fcd49d1b3de8696..2ead06d02973f21271443993194d36cd54c01bea 100644 GIT binary patch delta 295 zcmaFJx|3~!9y>Dw14C*_;zWy5CZ5)b8{O)2fC3P3c!50~7=@x4oNa|-L51Qag%U{- zmh3<~^Gb75@{@}TCA~Q#*ns@v;*!LY;zFqmmJC*JhSoyq4DR+qnV>>h4R1zoCU2g$ zLb(i~jh{6bUFAWBDFhWNmJ})_MQ{Q2#>Z!-=j9irrW6+{#}=wYaDc^%lQRn{iwjj_ c3)P?^Ks!Mq>am3yll_>a*)?MewMvup0K(o#%K!iX delta 233 zcmdnV_Ks4Om&j4hOk-~x+ersw4srKS`YO2-z;M2LdLic*tHi;6SLQsavg%Tjal z6M@pQv4wIG>_BOt5h?k}#f9?ToKQoHlQThvD#R8lMlb_aq?RNWDrLB{WUzWOv=%C7 zaJLt#1Qn`kcr$u4c{8>Zs$~duX0S&H0hL0);RW_^Mxi>$*&0EGnk9u=Nu^180Bs3D AzyJUM diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index 6f375e36e3..6dda042596 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -410,7 +410,7 @@ def testPersistence(self): # # Method used to save LDA models in Python 2.7 and 3.5 environments. # def testSaveModelsForPythonVersion(self): - # fname = os.path.join(os.path.dirname(__file__), 'ldamodel_python_3_5') + # fname = datapath('ldamodel_python_3_5') # corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) # model = ldamodel.LdaModel(corpus, id2word=dictionary, num_topics=2, passes=100, random_state = 1000007) # model.save(fname) From 35f2dcc850bb5c13c3160e1690c006ebeef101ea Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Fri, 9 Dec 2016 04:01:49 +0530 Subject: [PATCH 10/15] Added id2word in 'Separately' and created lda models again --- gensim/models/ldamodel.py | 35 ++---------------- gensim/test/test_data/ldamodel_python_2_7 | Bin 3439 -> 3438 bytes gensim/test/test_data/ldamodel_python_2_7.bin | 1 - .../test_data/ldamodel_python_2_7.eta.npy | Bin 96 -> 0 bytes gensim/test/test_data/ldamodel_python_3_5 | Bin 4912 -> 4911 bytes gensim/test/test_data/ldamodel_python_3_5.bin | 1 - .../test_data/ldamodel_python_3_5.eta.npy | Bin 96 -> 0 bytes .../test/test_data/ldamodel_python_3_5.state | Bin 825 -> 825 bytes gensim/test/test_ldamodel.py | 2 +- 9 files changed, 5 insertions(+), 34 deletions(-) delete mode 100644 gensim/test/test_data/ldamodel_python_2_7.bin delete mode 100644 gensim/test/test_data/ldamodel_python_2_7.eta.npy delete mode 100644 gensim/test/test_data/ldamodel_python_3_5.bin delete mode 100644 gensim/test/test_data/ldamodel_python_3_5.eta.npy diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 781d6e9d81..2b4297a5ca 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -996,7 +996,7 @@ def __getitem__(self, bow, eps=None): """ return self.get_document_topics(bow, eps, self.minimum_phi_value, self.per_word_topics) - def save(self, fname, ignore=['state', 'dispatcher'], separately = None, *args, **kwargs): + def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, **kwargs): """ Save the model to file. @@ -1036,9 +1036,9 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately = None, *args, else: ignore = ['state', 'dispatcher'] - # make sure 'expElogbeta' and 'sstats' are ignored from the pickled object, even if + # make sure 'expElogbeta', 'sstats' and 'id2word' are ignored from the pickled object, even if # someone sets the separately list themselves. - separately_explicit = ['expElogbeta', 'sstats'] + separately_explicit = ['expElogbeta', 'sstats', 'id2word'] # Also add 'alpha' and 'eta' to separately list if they are set 'auto' or some # array manually. if (isinstance(self.alpha, six.string_types) and self.alpha == 'auto') or len(self.alpha.shape) != 1: @@ -1054,23 +1054,8 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately = None, *args, else: separately = separately_explicit - # id2word needs to saved separately. - # If id2word is not already in ignore, then saving it separately in json. - id2word = None - if self.id2word is not None and 'id2word' not in ignore: - id2word = dict((k,v) for k, v in self.id2word.iteritems()) - self.id2word = None # remove the dictionary from model super(LdaModel, self).save(fname, ignore=ignore, separately = separately, *args, **kwargs) - self.id2word = id2word # restore the dictionary. - - # Save the dictionary separately in json. - id2word_fname = utils.smart_extension(fname, '.bin') - try: - with utils.smart_open(id2word_fname, 'w', encoding='utf-8') as fout: - json.dump(id2word, fout) - except Exception as e: - logging.warning("failed to save id2words dictionary in %s: %s", id2word_fname, e) - + @classmethod def load(cls, fname, *args, **kwargs): """ @@ -1083,18 +1068,6 @@ def load(cls, fname, *args, **kwargs): """ kwargs['mmap'] = kwargs.get('mmap', None) result = super(LdaModel, cls).load(fname, *args, **kwargs) - # Load the separately stored id2word dictionary saved in json. - id2word_fname = utils.smart_extension(fname, '.bin') - try: - with utils.smart_open(id2word_fname, 'r') as fin: - id2word = json.load(fin) - if id2word is not None: - result.id2word = utils.FakeDict(id2word) - else: - result.id2word = None - except Exception as e: - logging.warning("failed to load id2words from %s: %s", id2word_fname, e) - state_fname = utils.smart_extension(fname, '.state') try: result.state = super(LdaModel, cls).load(state_fname, *args, **kwargs) diff --git a/gensim/test/test_data/ldamodel_python_2_7 b/gensim/test/test_data/ldamodel_python_2_7 index 38865be9495bd387217fac5f2f9e713d569ad591..f8ee3514e143bd2caf01a455ac1267219945c4eb 100644 GIT binary patch delta 64 zcmaDa^-gNTRvr`f%oL;Y{GyaXjZoI&lEjkKLd{UFl+5CS#FFHU)S^PI)KH$%f)t=c Sd}>*0QDvdFH{;}2Jgoo{D;N;~ delta 65 zcmaDS^L?Wu{G2QpL zRc0!uWma^k!68JVGtt1|rY`^rv@iv*qp^ZrOYIJzIJ)3#G^*#F` z`AX@@@VhfdLtk zJ1{82(G-z#Zgd-lni&mIaWU|i62$<28iq-PNIP&`h7&0g3)3yY(8+v0zxoGPxRHo} zQ(DbD^4V|M>#3eLvBDV{&I&lE9h<`_nG8*;YHiq#$54l{0$gm+V;PeHrI1ppt(d<-;&|w9Slp#T zH<<^QSSY{-EtRM$OpkHXDH?D&n_1IxUw^#+t#mfQVc8OgDR|_x^ za_Lge5&Jb_G#x@gI9xAOAsx2JUb-tqXG3Fx!X#A!RHzNDz*IK#4Ij9|OwRf`X9A|P znS-Xa56_ZiTO}mPH7ylq%x7@p-w*}4Xwfw(%$VI~sQs*og3ie>pQ;jxprAL+hKB2N zaI2x=f((nOq4~dfw*&E%{ziG+zr4#d?s$PrAm@?{cLdzk4k!F@bnlU9*er4^JWKNh zGh(n@+g(aNU1?0U;|i-}Z80_A;eIV7ST}uplcN!`x&3w}hQ)&42Zzl(44PS$f#7tK i#5O!K7XyGr^sA?(XLVQV?}J=`D=))i0c)DCP5cG6d?Ta) delta 932 zcmZWn%Tg0T6rBNiBt(#>C_YgjsGtPop=eaZsBHo@<)SxnY90+UMKUvYx`PH6*@#qj zQm*&}7b;bLfWJWb1^NfLaARrdoldZ{?8Q7z-Fy1pbI$wW-Qic`shFZDmQ(W9xVyoi zF}v47+Y#~u;T93mjY8lUaHvWf5oN-FrtNP-m1fH;6@zlzvyCt`%5i8R$|kg0y5-x< z;&J9hA909L728aR(`I}Z4B>hc(Do7%8XVq&b`3g`L?ww$I8rCHMLEE}PTGL=E3?o= z6ryfHw+1~)(kR*Hp|>xW%RT&sM?Q!mpwFy2vDa_A!pczx`TI3ECg8Y9<4A*}>cS6P zi^BkEplx-i6xfDf98QoJZADDix#b3B4kz<)N;a}}qoA{O7M9^OiPHoY)tc(y8KTlA z1aW1f3}-Xx&x*2y{|b#%mDjSc$YGEqXbU2)oqFnrHk`}Jwwko(on}X>H{_rs!y1f4 zk~C~W(&>qI`q|mPFdFT2L4%6|E}66&sfw=W7ICj2^i9Kb3$7@`SSI~q<4fjU&+~(| z@wPPavrY8^=+4n&^1ef*)UO+)w=GkdVa`k4yim$CMAeuzGEf}m@^DSg z$esm@1%K6yGI!mO`SDj)I1@0RN$(>Q9%;#+s794Hsx-P~)hHGXT{t1*j$g3hW+uIC zj$}W5ee**y1qSD6eVW*kA~ZbPFY2}JZf$=Yd~$nKgF8v-TK4h8+}z|0EF2hhH!^Bb zgL`OHHbAE`PA#vcDY#$D>jMoQVqTrz|AI=Qpnlj@ciIOZMXqKw$O%|7v(qY3GwD5R P^80@Gj(}yeWRCv^_dO(; diff --git a/gensim/test/test_data/ldamodel_python_3_5.bin b/gensim/test/test_data/ldamodel_python_3_5.bin deleted file mode 100644 index a31dbd6199..0000000000 --- a/gensim/test/test_data/ldamodel_python_3_5.bin +++ /dev/null @@ -1 +0,0 @@ -{"0": "interface", "1": "human", "2": "computer", "3": "system", "4": "user", "5": "survey", "6": "time", "7": "response", "8": "eps", "9": "trees", "10": "graph", "11": "minors"} \ No newline at end of file diff --git a/gensim/test/test_data/ldamodel_python_3_5.eta.npy b/gensim/test/test_data/ldamodel_python_3_5.eta.npy deleted file mode 100644 index a5ceb80b2d236cf16d807ef7e63979bed00ebf4b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 96 zcmbR27wQ`j$;jZwP_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= dXCxM+0{I$7ItqrGItsN4aKOa?1`q6EGyo3%7kmH! diff --git a/gensim/test/test_data/ldamodel_python_3_5.state b/gensim/test/test_data/ldamodel_python_3_5.state index 2ead06d02973f21271443993194d36cd54c01bea..bcfdf8c5b279edee13eca0f5ccd1348a06a9c08a 100644 GIT binary patch delta 357 zcmXYs%TB^T7=>HV0wXA1P`qDJbV1@u7e0ZLkl2mgG)+4lX%lQYoq>dvD%C1oQ1WpzB(@r!!SHA<7}9Tc*H&7k9a@zLxG};6{oDn$?$zJ7Fcy* zIYf8jUXo^9ZIxV<9ZU6ukiHbC%pY1GglPpyLO;v=F~J%&Ca5xxr6Fg6@Faf_)ErYO z1FUO8-I@YKbbQ&ME*KTMgxIjuNF qRdea%A|jCvav3RiS2kL?yXpkjQB%R^zghhzN4IU7qOzx5pJf9(|;l5eM#Y=w=31m@ho#NJF{^vap7@;c)ec#Jx+#U kAaLoR#j8b4`292!Xgj!CR*GQ>TsxTGHT4?@w Date: Fri, 9 Dec 2016 07:32:19 +0530 Subject: [PATCH 11/15] Pickling id2word Dictionary separately. Also added test to check equality of id2word dictionary in loading a model --- gensim/models/ldamodel.py | 29 +++++++++++++----- .../test_data/ldamodel_python_2_7.id2word | Bin 0 -> 185 bytes gensim/test/test_data/ldamodel_python_3_5 | Bin 4911 -> 4911 bytes .../test_data/ldamodel_python_3_5.id2word | Bin 0 -> 185 bytes .../test/test_data/ldamodel_python_3_5.state | Bin 825 -> 825 bytes gensim/test/test_ldamodel.py | 8 ++++- 6 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 gensim/test/test_data/ldamodel_python_2_7.id2word create mode 100644 gensim/test/test_data/ldamodel_python_3_5.id2word diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 2b4297a5ca..96ff710b2f 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -43,7 +43,6 @@ from scipy.special import polygamma from six.moves import xrange import six -import json # log(sum(exp(x))) that tries to avoid overflow try: @@ -1025,20 +1024,24 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, ** """ if self.state is not None: self.state.save(utils.smart_extension(fname, '.state'), *args, **kwargs) - - # make sure 'state' and 'dispatcher' are ignored from the pickled object, even if + # Save the dictionary separately if not in 'ignore'. + id2word = None + if self.id2word is not None and 'id2word' not in ignore: + id2word = dict((k,v) for k,v in self.id2word.iteritems()) + + # make sure 'state', 'ignore' and 'dispatcher' are ignored from the pickled object, even if # someone sets the ignore list themselves if ignore is not None and ignore: if isinstance(ignore, six.string_types): ignore = [ignore] ignore = [e for e in ignore if e] # make sure None and '' are not in the list - ignore = list(set(['state', 'dispatcher']) | set(ignore)) + ignore = list(set(['state', 'dispatcher', 'id2word']) | set(ignore)) else: - ignore = ['state', 'dispatcher'] + ignore = ['state', 'dispatcher', 'id2word'] - # make sure 'expElogbeta', 'sstats' and 'id2word' are ignored from the pickled object, even if + # make sure 'expElogbeta' and 'sstats' are ignored from the pickled object, even if # someone sets the separately list themselves. - separately_explicit = ['expElogbeta', 'sstats', 'id2word'] + separately_explicit = ['expElogbeta', 'sstats'] # Also add 'alpha' and 'eta' to separately list if they are set 'auto' or some # array manually. if (isinstance(self.alpha, six.string_types) and self.alpha == 'auto') or len(self.alpha.shape) != 1: @@ -1053,8 +1056,9 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, ** separately = list(set(separately_explicit) | set(separately)) else: separately = separately_explicit - super(LdaModel, self).save(fname, ignore=ignore, separately = separately, *args, **kwargs) + # Save the id2word dictionary separately. + utils.pickle(id2word, utils.smart_extension(fname, '.id2word')) @classmethod def load(cls, fname, *args, **kwargs): @@ -1073,5 +1077,14 @@ def load(cls, fname, *args, **kwargs): result.state = super(LdaModel, cls).load(state_fname, *args, **kwargs) except Exception as e: logging.warning("failed to load state from %s: %s", state_fname, e) + id2word_fname = utils.smart_extension(fname, '.id2word') + try: + id2word = utils.unpickle(id2word_fname) + if id2word is not None: + result.id2word = id2word + else: + result.id2word = None + except Exception as e: + logging.warning("failed to load id2word dictionary from %s: %s", state_fname, e) return result # endclass LdaModel diff --git a/gensim/test/test_data/ldamodel_python_2_7.id2word b/gensim/test/test_data/ldamodel_python_2_7.id2word new file mode 100644 index 0000000000000000000000000000000000000000..aadb561e72b9deda016823f5c08d03cfc396fa1b GIT binary patch literal 185 zcmXAiOAf;z3`9vnJ|yiuy2?d-0k$Y|n=Bw0L!~S#_2{+e^1T_&eA^E-4^Y-9rH+s> zJ>7=XLSrW}+v#HwNgcGd7qJ)b0%_=KoRH%g7^w$swZ!@GNCWhC5a5#Dc+m*MA1}`6 qX@ap<3(Ks5$#zo3$TWkg7das^EihMlB^P(P!BRK;5zfSPFY6DeaV?Ah literal 0 HcmV?d00001 diff --git a/gensim/test/test_data/ldamodel_python_3_5 b/gensim/test/test_data/ldamodel_python_3_5 index 489aa41ecfebc3fb1788fea601bd678e08493264..9cc4a10e3a38203e29ebefac28b28b26ce4b25c7 100644 GIT binary patch delta 910 zcmZWo%Tg0T6wL&Ag~(e(QSbp6RH8iOA!vMIls1u?axt4Y)icvDQ)C`@x`PH6*@#qj zrd;s}E)=Wy3^$Ztpnrf1H3|!1(KT~t?&)*RJ-s`+JNh=g&=QNqbloetL778C z0gh>HSYa=NrJ}!PFhQY_Y8_Y#Sg617LrWKa;Fuhms}v}7{}_=ZcX zgL2b}KwArt(2`j81L3&NCetI-wyKuE8rz^noo%qN3`vrZXCoKA6j)R+4#!EWtXU2h zp<|Q;v!K1IHDV_%1~!dDNA1T*rPGuG5}rlFkd_nBMPggfZR)0PF_R~l7d<4PM^muV zfL<-)U8KVCCg8*y=u_e3HuS4-Dn%3$--6SJfR-o+*f*fX@&DC1I74DY*@m+!45mnf zL|cTRzI;Bv{0EoZNKC*vqiXlO``y()blyh%3o2X`Fl=b82ovcOEOa@HkYsIqfQ}a6 zlKf)YC2yUhLy*etH%pEIi>tQfg3-{1d|XCY6V_c{wX z+#rh9g=e{r=eS5eupJ!-S7I=e&Ai_Hmi^fKN{%?Bp5?nb7dSnbt(%yZ#oFh-=W}B! z%%`+wM9Mvzn4h1Tg_}pr7b511D%?Wmxd2CS+pfD_nubi>^*buuMc3`2|LndO*}eBk z*=eubk7PbjVM)M4BR8WEC7andr+yyv>>%+gNv+Z)*%c{}kw-anQo!ZDhR#bRtz zEh%i1`4wm)$KLxAaG?qY zt@Y=L!QSMk3~d>D9AER#CKnc_=O9&2OT*Q69aDRiq+_VQuU3+fCb4bk&=uV^n9h^T z2@XlpLHq4Chm9l4GsT?km){ubc-k@-ZNFjxmU_Qcj9S*}}Dl{pIweGk+GdPSC z;c~=}WlRN}LWWkGFn`42Xy|WP+*5^aG9Shm4HQ^cZCh1@=`(J+B?GSHva4GD`_GTR zX=?)a21EsDZ*%d2o0FTzB~rKfU^*sl?z;SdVK;d-$OlVOSMrMFyi*EJ?o zm{Mtg3bj!yFrCXD-~~sR$z5CHOu$Sod)Tmc;ci=wWg|(RX(`xPWd=8L+1Fb?a-Vu% zg}Ky{XR0u3wws~$b0!KpFT+BnN+g1U-ZUc(*XH3?q~W3rOQ@mwzj(I;@o0DSJ+5Cr zWTP`4AREYeM~1rs?rBGDebl@6NjxkTITpU9l?5|wcu=EhjCVa@+13_QBB;wzoBy*0 zygaOhgzIM6+2H63x%~cdn!w=T_0dsrkAvb?WFXkBB()7s%%uQu3H|Ed;sf`?LVaMpsZ0!?Po}} zV5SZ_J4$fW;3D)pFp>;Iz_|6T8F8uF~<557|1*&-3Bkw{kKPKGKHyA zY*}b_Sh=7*W83KUdar*JMPiD?j`tZ=d7nhzzoi#iZHC>5UfQ#;FLB`UD)q>8Fz^Kq zS-mhMgmk6AQB;YF4;T@-I<|1Ka8+WB2~MLXfq$M_ga61cX}SyqG}r@6q5joYEu{RiM2Wsv{? delta 357 zcmXYs%TB^T7=>HV0wXA1P`qDJbV1@u7e0ZLkl2mgG)+4lX%lQYoq>dvD%C1oQ1WpzB(@r!!SHA<7}9Tc*H&7k9a@zLxG};6{oDn$?$zJ7Fcy* zIYf8jUXo^9ZIxV<9ZU6ukiHbC%pY1GglPpyLO;v=F~J%&Ca5xxr6Fg6@Faf_)ErYO z1FUO8-I@YKbbQ&ME*KTMgxIjuNF qRdea%A|jCvav3RiS2kL?yXpkjQB%R^zghhzN4IU Date: Fri, 9 Dec 2016 07:50:18 +0530 Subject: [PATCH 12/15] Removed commented code --- gensim/test/test_ldamodel.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index d10a350c6b..e57421ccc1 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -87,7 +87,6 @@ def testTransform(self): def testAlphaAuto(self): model1 = self.class_(corpus, id2word=dictionary, alpha='symmetric', passes=10) - logging.warning("id2word type: %s", type(model1.id2word)) modelauto = self.class_(corpus, id2word=dictionary, alpha='auto', passes=10) # did we learn something? @@ -410,14 +409,6 @@ def testPersistence(self): tstvec = [] self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - # # Method used to save LDA models in Python 2.7 and 3.5 environments. - # def testSaveModelsForPythonVersion(self): - # fname = datapath('ldamodel_python_2_7') - # corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) - # model = ldamodel.LdaModel(corpus, id2word=dictionary, num_topics=2, passes=100, random_state = 1000007) - # model.save(fname) - # logging.warning("LDA Model saved") - def testModelCompatibilityWithPythonVersions(self): fname_model_2_7 = datapath('ldamodel_python_2_7') model_2_7 = self.class_.load(fname_model_2_7) From dac55bc7668deb40238026085261f27486f1c906 Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Fri, 9 Dec 2016 08:11:05 +0530 Subject: [PATCH 13/15] Minor change. --- gensim/test/test_ldamodel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index e57421ccc1..46a1fe5e8f 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -418,8 +418,8 @@ def testModelCompatibilityWithPythonVersions(self): self.assertTrue(np.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta)) tstvec = [] self.assertTrue(np.allclose(model_2_7[tstvec], model_3_5[tstvec])) # try projecting an empty vector - id2word_2_7 = dict((k,v) for k,v in model_2_7.id2word.iteritems()) - id2word_3_5 = dict((k,v) for k,v in model_3_5.id2word.iteritems()) + id2word_2_7 = dict((k,v) for k,v in model_2_7.id2word.items()) + id2word_3_5 = dict((k,v) for k,v in model_3_5.id2word.items()) self.assertEqual(set(id2word_2_7.keys()), set(id2word_3_5.keys())) From 615b91eedd9fc9ff333bea69db242cba8b22dcf3 Mon Sep 17 00:00:00 2001 From: anmol01gulati Date: Thu, 15 Dec 2016 19:23:28 +0530 Subject: [PATCH 14/15] Changes made --- gensim/models/ldamodel.py | 38 +++++++++--------- .../test_data/ldamodel_python_2_7.id2word | Bin 185 -> 412 bytes gensim/test/test_data/ldamodel_python_3_5 | Bin 4911 -> 4911 bytes .../test_data/ldamodel_python_3_5.id2word | Bin 185 -> 430 bytes .../test/test_data/ldamodel_python_3_5.state | Bin 825 -> 825 bytes gensim/test/test_ldamodel.py | 14 ++++--- 6 files changed, 27 insertions(+), 25 deletions(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 96ff710b2f..dd0b90c870 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -34,9 +34,11 @@ import logging import numpy as np # for arrays, array broadcasting etc. import numbers +import os from gensim import interfaces, utils, matutils from gensim.models import basemodel +from gensim.corpora import Dictionary from itertools import chain from scipy.special import gammaln, psi # gamma function utils @@ -239,11 +241,11 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, prior directly from your data. `eta` can be a scalar for a symmetric prior over topic/word - distributions, or a vector of shape num_words, which can be used to - impose (user defined) asymmetric priors over the word distribution. + distributions, or a vector of shape num_words, which can be used to + impose (user defined) asymmetric priors over the word distribution. It also supports the special value 'auto', which learns an asymmetric prior over words directly from your data. `eta` can also be a matrix - of shape num_topics x num_words, which can be used to impose + of shape num_topics x num_words, which can be used to impose asymmetric priors over the word distribution on a per-topic basis (can not be learned from data). @@ -1025,11 +1027,10 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, ** if self.state is not None: self.state.save(utils.smart_extension(fname, '.state'), *args, **kwargs) # Save the dictionary separately if not in 'ignore'. - id2word = None - if self.id2word is not None and 'id2word' not in ignore: - id2word = dict((k,v) for k,v in self.id2word.iteritems()) - - # make sure 'state', 'ignore' and 'dispatcher' are ignored from the pickled object, even if + if 'id2word' not in ignore: + utils.pickle(self.id2word, utils.smart_extension(fname, '.id2word')) + + # make sure 'state', 'id2word' and 'dispatcher' are ignored from the pickled object, even if # someone sets the ignore list themselves if ignore is not None and ignore: if isinstance(ignore, six.string_types): @@ -1042,7 +1043,7 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, ** # make sure 'expElogbeta' and 'sstats' are ignored from the pickled object, even if # someone sets the separately list themselves. separately_explicit = ['expElogbeta', 'sstats'] - # Also add 'alpha' and 'eta' to separately list if they are set 'auto' or some + # Also add 'alpha' and 'eta' to separately list if they are set 'auto' or some # array manually. if (isinstance(self.alpha, six.string_types) and self.alpha == 'auto') or len(self.alpha.shape) != 1: separately_explicit.append('alpha') @@ -1057,9 +1058,7 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, ** else: separately = separately_explicit super(LdaModel, self).save(fname, ignore=ignore, separately = separately, *args, **kwargs) - # Save the id2word dictionary separately. - utils.pickle(id2word, utils.smart_extension(fname, '.id2word')) - + @classmethod def load(cls, fname, *args, **kwargs): """ @@ -1078,13 +1077,12 @@ def load(cls, fname, *args, **kwargs): except Exception as e: logging.warning("failed to load state from %s: %s", state_fname, e) id2word_fname = utils.smart_extension(fname, '.id2word') - try: - id2word = utils.unpickle(id2word_fname) - if id2word is not None: - result.id2word = id2word - else: - result.id2word = None - except Exception as e: - logging.warning("failed to load id2word dictionary from %s: %s", state_fname, e) + if (os.path.isfile(id2word_fname)): + try: + result.id2word = utils.unpickle(id2word_fname) + except Exception as e: + logging.warning("failed to load id2word dictionary from %s: %s", id2word_fname, e) + else: + result.id2word = None return result # endclass LdaModel diff --git a/gensim/test/test_data/ldamodel_python_2_7.id2word b/gensim/test/test_data/ldamodel_python_2_7.id2word index aadb561e72b9deda016823f5c08d03cfc396fa1b..5fad7912fbaac2ac1564ff3faa0a094b1702478b 100644 GIT binary patch literal 412 zcmYL_%TB~F3`P5H($1Ub0`|jZ7V{14ysp$8Ds5&Gp(b_GAPrI__;XA_Le`P5b?v+G zXVjjBcB((L#tz0dhfcMlGP<#s;`v`<`1FO*gz>&&+V?M=X&olOD>j-pp^vJ=bi!<3 zXF(A3N*n7ihq6v-JlkfF$e@@_cX6ZWkwZ3{qZPto0lbcBRSXVGU^OG%ntt%3u(*Km z_tM*Uals{|bwa*(){)#MQZT? literal 185 zcmXAiOAf;z3`9vnJ|yiuy2?d-0k$Y|n=Bw0L!~S#_2{+e^1T_&eA^E-4^Y-9rH+s> zJ>7=XLSrW}+v#HwNgcGd7qJ)b0%_=KoRH%g7^w$swZ!@GNCWhC5a5#Dc+m*MA1}`6 qX@ap<3(Ks5$#zo3$TWkg7das^EihMlB^P(P!BRK;5zfSPFY6DeaV?Ah diff --git a/gensim/test/test_data/ldamodel_python_3_5 b/gensim/test/test_data/ldamodel_python_3_5 index 9cc4a10e3a38203e29ebefac28b28b26ce4b25c7..0733b35fd50b9c8d67d5b8236c848ba858a740b6 100644 GIT binary patch delta 967 zcmZXSOH9|3~7>+dy5tq-G~f3 zXS#Cd){QLu051Fj=qJE$fLq5IJh=sB#ygYb&dLAWbN|0{wg`$rR*YE=!StJZeB>_oA z+JNJxZu%BAS%P}eCIKl;#ykVsv_|~8SlJte6K|nig^o?=RG}-Q$Vzepx(^PtcqPER zlUfY*%hPa5kra6oPOH$9QR3pXS?Em{3WfQ7T;ioj9MZ;?ozh%Ps(p*4YhPl+`1CKr z85^seRpA_m^M)1y#f$3*p~T>Vk`(NsNF5e~i%N^uhN^&u`b$5wbnXX^$)K;!G$fwo zmvqLFEL^HB#rqcfhg2BIXib=ue=#~UGd=}_cB?IbT(;3=Muj1CnGa$caK)}WD^I}H zy0dF445Kr<_diP`k)_YO@%O2v>ye8aDvWX%GxC$NBImL@=J>C@&i5R~4bd=NcUC+< zq!xpTBHYY~AdW>qxM|Z6Cbb56L*PVwn9oJqH{Z>%=TRivbpSnk^lbl<8M*AK3?|@ z+1!uVJW%0bGy#aF`pE8ydf0*G>g(%=?B5N{MF<|NkmK;g&^qx)$?=>Lwk`;LgBXtM z@CrQ5W#6p-$bIU1t+gMO*p9wN+%kpve}V`H4lAjElFRN&s;G4c;IQw{l$aP@e|ecv h4*6VmPv|5O4G3eDK|!$&C^6FsDh!?%VWDh{`~f%PD$D=? delta 970 zcmZWoNlz3(6rLGm8%B`LU2p*$Tu>H46dm_b%0bdGu`bxD?k?zx)646s9vwBAi%7U> zIC1mpMGpQ0J$fPj0{sVgH75GH2Jv7WD&4R9tM@IhHikBa-i!>iq*5u<^kXlqN@!Ss zU9=4={8@Or94uK}F=%9K$1=UOrd8UFUp(OpFZRqZ3M^)cvQSlME*4%r|6csi^MW?v zFTvuX&0>vLSlQ$i9#tVnvYHEW zGvhGtFvTV8Cat>Y2&p1r#fm%7zDgSr%F3b3pyTbQ!P*{M3(EQqiy~IdLMKTrLzivZ zfx~T?<$m&zg>ITgpangecbO^Sk3#Qj*lWPP71(dUfjmi*%rYF@KG0Fs5OIfS262lQ z;V?;&^a>m?pf67v^l3BDUz?kooBfSTZzQGQsI}^L(=G(n*XMVIod-DPV$0(OoKP@e z(N_GBNLD=ZBn*-q?Zt{|M%<1gDJtBQtisCyb0iEcz)20T-PnI3(M!meHk)U)oRCS0 zjtvt>J1g)&6d3ptPLVO%fQKf&pLX}S+MP2lNDoK;L=~}mlNtU23$ev zr4Zw~>edsAC!kPI=$Zl7F(J4Azi4hG(R}=pS#Pi1OmyBd;I@K0R%tR#(#682J^pj6 zdtE`%(o=@>!>Vw%SonraaM5<$b|$$#gZJvQY?@-p4wanP=_QA}p||;fZ-r|skx^JOCA&Zh7-)Z9T0H~;MUfnihWrVd zvrx9{4~;g1Hua(Hnz7TWHkab<`Xul5m;9N6eU*8h*R{t{e+ot6Owk@*6?0NfeXS^l zU*!u#jl!JjMo@ylH8r{)CQed{uu2$WC-WsPlp*>LFLo3?<%k{J2s>zH1r`?LVaMpsZ0!?Po}} zV5SZ_J4$fW;3D?35i{ei>8@QN81EjPG=xtR&BTGYKz4EFrK{ZZw))s6#}rfh7ZMvMv8xW|(~aA}J;d)Qc~rXKUL zK`njvnTp~&)Th{VurrWg%h7PG{y5$!H21gM#7V9B>D-{dpLD*mWPs3 tF(q*xR>S-g7okpe>7bn@E0Ee0S7A%Tf4HF0u^s4-tev{x*&1q!Y6PNV!E1`G|hB6(k9q)I#ZIaSOIaT@ljmy zbUHLlGMD`4Ki{00g|)E0diAnl7(y|WzbBnZg~!>)D7WK)r76p8tPx`hi+el?1edmm zmj@eFYHFL08nyI3WGaduQJ=wfus%n_!A6&uq%p Date: Thu, 15 Dec 2016 20:54:02 +0530 Subject: [PATCH 15/15] Refactored testfile() function --- gensim/models/ldamodel.py | 1 - gensim/test/test_ldamodel.py | 9 +++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index dd0b90c870..c2324904e3 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -38,7 +38,6 @@ from gensim import interfaces, utils, matutils from gensim.models import basemodel -from gensim.corpora import Dictionary from itertools import chain from scipy.special import gammaln, psi # gamma function utils diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index ae0080c85c..041724dad2 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -44,13 +44,10 @@ corpus = [dictionary.doc2bow(text) for text in texts] -def testfile(test_name=None): +def testfile(test_fname=''): # temporary data will be stored to this file - if test_name is None: - return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') - else: - fname = 'gensim_models_' + test_name + '.tst' - return os.path.join(tempfile.gettempdir(), fname) + fname = 'gensim_models_' + test_fname + '.tst' + return os.path.join(tempfile.gettempdir(), fname) def testRandomState():