From cde582ec204604c96e5dad60392f5d88d7bc8cbe Mon Sep 17 00:00:00 2001
From: Timofey Yefimov <anotherbugmaster@gmail.com>
Date: Mon, 2 Oct 2017 14:42:31 +0500
Subject: [PATCH] Make `save_corpus` private

---
 gensim/corpora/bleicorpus.py       | 14 ++++++++------
 gensim/corpora/lowcorpus.py        |  2 +-
 gensim/corpora/malletcorpus.py     |  2 +-
 gensim/corpora/mmcorpus.py         |  2 +-
 gensim/corpora/sharded_corpus.py   |  4 ++--
 gensim/corpora/svmlightcorpus.py   |  2 +-
 gensim/corpora/ucicorpus.py        |  2 +-
 gensim/interfaces.py               |  8 ++++++--
 gensim/models/wrappers/dtmmodel.py |  2 +-
 gensim/test/test_miislita.py       |  2 +-
 10 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/gensim/corpora/bleicorpus.py b/gensim/corpora/bleicorpus.py
index 6bd96da716..273759aca6 100644
--- a/gensim/corpora/bleicorpus.py
+++ b/gensim/corpora/bleicorpus.py
@@ -5,9 +5,7 @@
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
 
-"""
-Blei's LDA-C format.
-"""
+"""Blei's LDA-C format."""
 
 from __future__ import with_statement
 
@@ -41,8 +39,9 @@ def __init__(self, fname, fname_vocab=None):
         """
         Initialize the corpus from a file.
 
-        `fname_vocab` is the file with vocabulary; if not specified, it defaults to
-        `fname.vocab`.
+        Args:
+            fname (str): serialized corpus's filename
+            fname_vocab (str): vocabulary file; takes precedence over fname.vocab
         """
         IndexedCorpus.__init__(self, fname)
         logger.info("loading corpus from %s", fname)
@@ -85,7 +84,7 @@ def line2doc(self, line):
         return doc
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, metadata=False):
+    def __save_corpus(fname, corpus, id2word=None, metadata=False):
         """
         Save a corpus in the LDA-C format.
 
@@ -94,6 +93,9 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
 
         This function is automatically called by `BleiCorpus.serialize`; don't
         call it directly, call `serialize` instead.
+
+        Args:
+
         """
         if id2word is None:
             logger.info("no word id mapping provided; initializing from corpus")
diff --git a/gensim/corpora/lowcorpus.py b/gensim/corpora/lowcorpus.py
index e293c998a1..49de7fb9cf 100644
--- a/gensim/corpora/lowcorpus.py
+++ b/gensim/corpora/lowcorpus.py
@@ -141,7 +141,7 @@ def __iter__(self):
                     yield self.line2doc(line)
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, metadata=False):
+    def __save_corpus(fname, corpus, id2word=None, metadata=False):
         """
         Save a corpus in the List-of-words format.
 
diff --git a/gensim/corpora/malletcorpus.py b/gensim/corpora/malletcorpus.py
index cacf0074bd..b6dc482dcc 100644
--- a/gensim/corpora/malletcorpus.py
+++ b/gensim/corpora/malletcorpus.py
@@ -67,7 +67,7 @@ def line2doc(self, line):
             return doc
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, metadata=False):
+    def __save_corpus(fname, corpus, id2word=None, metadata=False):
         """
         Save a corpus in the Mallet format.
 
diff --git a/gensim/corpora/mmcorpus.py b/gensim/corpora/mmcorpus.py
index 2158f0a526..1eaadfb332 100644
--- a/gensim/corpora/mmcorpus.py
+++ b/gensim/corpora/mmcorpus.py
@@ -38,7 +38,7 @@ def __iter__(self):
             yield doc  # get rid of doc id, return the sparse vector only
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False):
+    def __save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False):
         """
         Save a corpus in the Matrix Market format to disk.
 
diff --git a/gensim/corpora/sharded_corpus.py b/gensim/corpora/sharded_corpus.py
index 049e22f226..c0fdbfa409 100644
--- a/gensim/corpora/sharded_corpus.py
+++ b/gensim/corpora/sharded_corpus.py
@@ -773,7 +773,7 @@ def load(cls, fname, mmap=None):
         return super(ShardedCorpus, cls).load(fname, mmap)
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False, **kwargs):
+    def __save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False, **kwargs):
         """
         Implement a serialization interface. Do not call directly;
         use the `serialize` method instead.
@@ -809,4 +809,4 @@ def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progres
         Ignore the parameters id2word, index_fname, progress_cnt, labels
         and metadata. They currently do nothing and are here only to
         provide a compatible method signature with superclass."""
-        serializer.save_corpus(fname, corpus, id2word=id2word, progress_cnt=progress_cnt, metadata=metadata, **kwargs)
+        serializer.__save_corpus(fname, corpus, id2word=id2word, progress_cnt=progress_cnt, metadata=metadata, **kwargs)
diff --git a/gensim/corpora/svmlightcorpus.py b/gensim/corpora/svmlightcorpus.py
index c19aa321e2..0b43792ece 100644
--- a/gensim/corpora/svmlightcorpus.py
+++ b/gensim/corpora/svmlightcorpus.py
@@ -79,7 +79,7 @@ def __iter__(self):
         self.length = lineno + 1
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, labels=False, metadata=False):
+    def __save_corpus(fname, corpus, id2word=None, labels=False, metadata=False):
         """
         Save a corpus in the SVMlight format.
 
diff --git a/gensim/corpora/ucicorpus.py b/gensim/corpora/ucicorpus.py
index a8911ee07f..995ce3e6ad 100644
--- a/gensim/corpora/ucicorpus.py
+++ b/gensim/corpora/ucicorpus.py
@@ -192,7 +192,7 @@ def create_dictionary(self):
         return dictionary
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False):
+    def __save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False):
         """
         Save a corpus in the UCI Bag-of-Words format.
 
diff --git a/gensim/interfaces.py b/gensim/interfaces.py
index 6cc7e8d872..8c831fd40f 100644
--- a/gensim/interfaces.py
+++ b/gensim/interfaces.py
@@ -74,14 +74,14 @@ def __len__(self):
 #        return sum(1 for doc in self) # sum(empty generator) == 0, so this works even for an empty corpus
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, metadata=False):
+    def __save_corpus(fname, corpus, id2word=None, metadata=False):
         """
         Save an existing `corpus` to disk.
 
         Some formats also support saving the dictionary (`feature_id->word` mapping),
         which can in this case be provided by the optional `id2word` parameter.
 
-        >>> MmCorpus.save_corpus('file.mm', corpus)
+        >>> MmCorpus.__save_corpus('file.mm', corpus)
 
         Some corpora also support an index of where each document begins, so
         that the documents on disk can be accessed in O(1) time (see the
@@ -103,6 +103,10 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
                 fmt = str(doc)  # format the document appropriately...
                 fout.write(utils.to_utf8("%s\n" % fmt))  # serialize the formatted document to disk
 
+    def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None, labels=None,
+                  metadata=False):
+        pass
+
 
 class TransformedCorpus(CorpusABC):
     def __init__(self, obj, corpus, chunksize=None, **kwargs):
diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
index 8bbadfc663..3eea2ab651 100644
--- a/gensim/models/wrappers/dtmmodel.py
+++ b/gensim/models/wrappers/dtmmodel.py
@@ -176,7 +176,7 @@ def convert_input(self, corpus, time_slices):
         """
         logger.info("serializing temporary corpus to %s", self.fcorpustxt())
         # write out the corpus in a file format that DTM understands:
-        corpora.BleiCorpus.save_corpus(self.fcorpustxt(), corpus)
+        corpora.BleiCorpus.__save_corpus(self.fcorpustxt(), corpus)
 
         with utils.smart_open(self.ftimeslices(), 'wb') as fout:
             fout.write(utils.to_utf8(str(len(self.time_slices)) + "\n"))
diff --git a/gensim/test/test_miislita.py b/gensim/test/test_miislita.py
index 344da1adb3..5863fc9f65 100644
--- a/gensim/test/test_miislita.py
+++ b/gensim/test/test_miislita.py
@@ -56,7 +56,7 @@ def test_textcorpus(self):
 
         # make sure serializing works
         ftmp = get_tmpfile('test_textcorpus.mm')
-        corpora.MmCorpus.save_corpus(ftmp, miislita)
+        corpora.MmCorpus.__save_corpus(ftmp, miislita)
         self.assertTrue(os.path.exists(ftmp))
 
         # make sure deserializing gives the same result