Skip to content

Commit

Permalink
Add double quote in commmand line to run on Windows (#1005)
Browse files Browse the repository at this point in the history
replace single quotes with double quotes
  • Loading branch information
Akarazeev authored and tmylk committed Nov 10, 2016
1 parent ed94462 commit d7c1055
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions gensim/models/wrappers/ldamallet.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def convert_input(self, corpus, infer=False, serialize_corpus=True):
self.corpus2mallet(corpus, fout)

# convert the text file above into MALLET's internal format
cmd = self.mallet_path + " import-file --preserve-case --keep-sequence --remove-stopwords --token-regex '\S+' --input %s --output %s"
cmd = self.mallet_path + ' import-file --preserve-case --keep-sequence --remove-stopwords --token-regex "\S+" --input %s --output %s'
if infer:
cmd += ' --use-pipe-from ' + self.fcorpusmallet()
cmd = cmd % (self.fcorpustxt(), self.fcorpusmallet() + '.infer')
Expand All @@ -166,7 +166,7 @@ def train(self, corpus):
logger.info("training MALLET LDA with %s", cmd)
check_output(cmd, shell=True)
self.word_topics = self.load_word_topics()
# NOTE - we are still keeping the wordtopics variable to not break backward compatibility.
# NOTE - we are still keeping the wordtopics variable to not break backward compatibility.
# word_topics has replaced wordtopics throughout the code; wordtopics just stores the values of word_topics when train is called.
self.wordtopics = self.word_topics

Expand Down Expand Up @@ -260,20 +260,20 @@ def get_version(self, direc_path):
Check version of mallet via jar file
"""
archive = zipfile.ZipFile(direc_path, 'r')
if u'cc/mallet/regression/' not in archive.namelist():
if u'cc/mallet/regression/' not in archive.namelist():
return '2.0.7'
else:
return '2.0.8RC3'
except Exception:

xml_path = direc_path.split("bin")[0]
try:
doc = et.parse(xml_path + "pom.xml").getroot()
namespace = doc.tag[:doc.tag.index('}') + 1]
return doc.find(namespace + 'version').text.split("-")[0]
except Exception:
return "Can't parse pom.xml version file"



def read_doctopics(self, fname, eps=1e-6, renorm=True):
Expand Down Expand Up @@ -304,7 +304,7 @@ def read_doctopics(self, fname, eps=1e-6, renorm=True):
if mallet_version == "2.0.7":
"""
1 1 0 1.0780612802674239 30.005575655428533364 2 0.005575655428533364 1 0.005575655428533364
1 1 0 1.0780612802674239 30.005575655428533364 2 0.005575655428533364 1 0.005575655428533364
2 2 0 0.9184413079632608 40.009062076892971008 3 0.009062076892971008 2 0.009062076892971008 1 0.009062076892971008
In the above example there is a mix of the above if and elif statement. There are neither `2*num_topics` nor `num_topics` elements.
It has 2 formats 40.009062076892971008 and 0 1.0780612802674239 which cannot be handled by above if elif.
Expand All @@ -316,14 +316,14 @@ def read_doctopics(self, fname, eps=1e-6, renorm=True):
doc = []
if len(parts) > 0:
while count < len(parts):
"""
"""
if section is to deal with formats of type 2 0.034
so if count reaches index of 2 and since int(2) == float(2) so if block is executed
now there is one extra element afer 2, so count + 1 access should not give an error
else section handles formats of type 20.034
now count is there on index of 20.034 since float(20.034) != int(20.034) so else block
is executed
is executed
"""
if float(parts[count]) == int(parts[count]):
Expand Down

0 comments on commit d7c1055

Please sign in to comment.