-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor code with PEP8 and additional limitations. Fix #1521 #1569
Changes from 1 commit
394913d
62e92fc
6c6213b
f34e7b3
ab34c2e
7bbd7a6
bb1a07b
d7bc17a
49a3bfb
630c390
aaeda8e
7e94ce0
9d2e473
a242e06
cccf8d9
a6370af
c9b138f
614fb7c
2a72161
b28b6c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -436,7 +436,7 @@ def testPasses(self): | |
for test_rhot in test_rhots: | ||
model.update(corpus, author2doc) | ||
|
||
msg = ", ".join(map(str, [passes, model.num_updates, model.state.numdocs])) | ||
msg = ", ".join(str(x) for x in [passes, model.num_updates, model.state.numdocs]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, fixed |
||
self.assertAlmostEqual(final_rhot(), test_rhot, msg=msg) | ||
|
||
self.assertEqual(model.state.numdocs, len(corpus) * len(test_rhots)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,7 +190,7 @@ def testPipeline(self): | |
uncompressed_content = codecs.decode(compressed_content, 'zlib_codec') | ||
cache = pickle.loads(uncompressed_content) | ||
data = cache | ||
id2word = Dictionary(map(lambda x: x.split(), data.data)) | ||
id2word = Dictionary([x.split() for x in data.data]) | ||
corpus = [id2word.doc2bow(i.split()) for i in data.data] | ||
numpy.random.mtrand.RandomState(1) # set seed for getting same result | ||
clf = linear_model.LogisticRegression(penalty='l2', C=0.1) | ||
|
@@ -280,7 +280,7 @@ def testPipeline(self): | |
uncompressed_content = codecs.decode(compressed_content, 'zlib_codec') | ||
cache = pickle.loads(uncompressed_content) | ||
data = cache | ||
id2word = Dictionary(map(lambda x: x.split(), data.data)) | ||
id2word = Dictionary([x.split() for x in data.data]) | ||
corpus = [id2word.doc2bow(i.split()) for i in data.data] | ||
numpy.random.mtrand.RandomState(1) # set seed for getting same result | ||
clf = linear_model.LogisticRegression(penalty='l2', C=0.1) | ||
|
@@ -363,7 +363,7 @@ def testPipeline(self): | |
data = cache | ||
test_data = data.data[0:2] | ||
test_target = data.target[0:2] | ||
id2word = Dictionary(map(lambda x: x.split(), test_data)) | ||
id2word = Dictionary([x.split() for x in data.data]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, fixed. |
||
corpus = [id2word.doc2bow(i.split()) for i in test_data] | ||
model = LdaSeqTransformer(id2word=id2word, num_topics=2, time_slice=[1, 1, 1], initialize='gensim') | ||
clf = linear_model.LogisticRegression(penalty='l2', C=0.1) | ||
|
@@ -433,7 +433,7 @@ def testPipeline(self): | |
uncompressed_content = codecs.decode(compressed_content, 'zlib_codec') | ||
cache = pickle.loads(uncompressed_content) | ||
data = cache | ||
id2word = Dictionary(map(lambda x: x.split(), data.data)) | ||
id2word = Dictionary([x.split() for x in data.data]) | ||
corpus = [id2word.doc2bow(i.split()) for i in data.data] | ||
numpy.random.mtrand.RandomState(1) # set seed for getting same result | ||
clf = linear_model.LogisticRegression(penalty='l2', C=0.1) | ||
|
@@ -517,8 +517,8 @@ def testPipeline(self): | |
('calculus', 'mathematics'), ('mathematical', 'mathematics'), ('geometry', 'mathematics'), ('operations', 'mathematics'), ('curves', 'mathematics'), | ||
('natural', 'physics'), ('nuclear', 'physics'), ('science', 'physics'), ('electromagnetism', 'physics'), ('natural', 'physics') | ||
] | ||
train_input = list(map(lambda x: x[0], train_data)) | ||
train_target = list(map(lambda x: class_dict[x[1]], train_data)) | ||
train_input = [x[0] for x in train_data] | ||
train_target = [class_dict[x[1]] for x in train_data] | ||
|
||
clf = linear_model.LogisticRegression(penalty='l2', C=0.1) | ||
clf.fit(model.transform(train_input), train_target) | ||
|
@@ -682,8 +682,8 @@ def testPipeline(self): | |
(['calculus', 'mathematical'], 'mathematics'), (['geometry', 'operations', 'curves'], 'mathematics'), | ||
(['natural', 'nuclear'], 'physics'), (['science', 'electromagnetism', 'natural'], 'physics') | ||
] | ||
train_input = list(map(lambda x: x[0], train_data)) | ||
train_target = list(map(lambda x: class_dict[x[1]], train_data)) | ||
train_input = [x[0] for x in train_data] | ||
train_target = [class_dict[x[1]] for x in train_data] | ||
|
||
clf = linear_model.LogisticRegression(penalty='l2', C=0.1) | ||
clf.fit(model.transform(train_input), train_target) | ||
|
@@ -737,7 +737,7 @@ def testTransform(self): | |
doc = ['computer system interface time computer system'] | ||
bow_vec = self.model.transform(doc)[0] | ||
expected_values = [1, 1, 2, 2] # comparing only the word-counts | ||
values = list(map(lambda x: x[1], bow_vec)) | ||
values = [x[1] for x in bow_vec] | ||
self.assertEqual(sorted(expected_values), sorted(values)) | ||
|
||
def testSetGetParams(self): | ||
|
@@ -815,7 +815,7 @@ def testPipeline(self): | |
uncompressed_content = codecs.decode(compressed_content, 'zlib_codec') | ||
cache = pickle.loads(uncompressed_content) | ||
data = cache | ||
id2word = Dictionary(map(lambda x: x.split(), data.data)) | ||
id2word = Dictionary([x.split() for x in data.data]) | ||
corpus = [id2word.doc2bow(i.split()) for i in data.data] | ||
tfidf_model = TfIdfTransformer() | ||
tfidf_model.fit(corpus) | ||
|
@@ -881,7 +881,7 @@ def testPipeline(self): | |
uncompressed_content = codecs.decode(compressed_content, 'zlib_codec') | ||
cache = pickle.loads(uncompressed_content) | ||
data = cache | ||
id2word = Dictionary(map(lambda x: x.split(), data.data)) | ||
id2word = Dictionary([x.split() for x in data.data]) | ||
corpus = [id2word.doc2bow(i.split()) for i in data.data] | ||
model = HdpTransformer(id2word=id2word) | ||
clf = linear_model.LogisticRegression(penalty='l2', C=0.1) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@menshikh-iv
line 308:
line 310 :
How about above code?
Above code is less loop
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, accepted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@zsef123 Both examples above are incorrect. The correct formatting using hanging indent:
Actually, this line not too long, so simple:
would work too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
already done as your last variant @piskvorky