Document bleicorpus in Numpy style

VaiyeBe · Nov 23, 2017 · 04c2e61 · 04c2e61
1 parent 3b55037
commit 04c2e61
Showing 1 changed file with 52 additions and 29 deletions.
diff --git a/gensim/corpora/bleicorpus.py b/gensim/corpora/bleicorpus.py
@@ -5,9 +5,7 @@
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
 
-"""
-Blei's LDA-C format.
-"""
+"""Blei's LDA-C format."""
 
 from __future__ import with_statement
 
@@ -41,11 +39,18 @@ def __init__(self, fname, fname_vocab=None):
         """
         Initialize the corpus from a file.
 
-        Args:
-            fname (str): serialized corpus's filename
-            fname_vocab (:obj:`str`, optional): vocabulary file; takes precedence over fname.vocab
-        Raises:
-            IOError: If vocabulary file doesn't exist
+        Parameters
+        ----------
+        fname : str
+            Serialized corpus's filename
+        fname_vocab : str or None, optional
+            Vocabulary file; takes precedence over
+
+        Raises
+        ------
+        IOError
+            If vocabulary file doesn't exist
+
         """
         IndexedCorpus.__init__(self, fname)
         logger.info("loading corpus from %s", fname)
@@ -70,9 +75,7 @@ def __init__(self, fname, fname_vocab=None):
         self.id2word = dict(enumerate(words))
 
     def __iter__(self):
-        """
-        Iterate over the corpus, returning one sparse vector at a time.
-        """
+        """Iterate over the corpus, returning one sparse vector at a time."""
         lineno = -1
         with utils.smart_open(self.fname) as fin:
             for lineno, line in enumerate(fin):
@@ -81,12 +84,20 @@ def __iter__(self):
 
     def line2doc(self, line):
         """
-        Args:
-            line (str): document's string representation
-        Returns:
-            :obj:`list` of (:obj:`int`, :obj:`float`):
-                document's list representation
-        Raises:
+        Convert line to document.
+
+        Parameters
+        ----------
+        line : str
+            Document's string representation
+
+        Returns
+        -------
+        list of (int, float)
+            document's list representation
+
+        Raises
+        ------
             ValueError: If format is invalid
         """
         parts = utils.to_unicode(line).split()
@@ -104,14 +115,21 @@ def _save_corpus(fname, corpus, id2word=None, metadata=False):
         There are actually two files saved: `fname` and `fname.vocab`, where
         `fname.vocab` is the vocabulary file.
 
-        Args:
-            fname (str): filename
-            corpus : yields documents
-            id2word (:obj:`dict` of (:obj:`str`, :obj:`str`), optional):
-                transforms id to word
-            metadata (bool): any additional info
-        Returns:
-            :obj:`list` of :obj:`int`: fields' offsets
+        Parameters
+        ----------
+        fname : str
+            Filename
+        corpus : iterable
+            Iterable of documents
+        id2word : dict of (str, str), optional
+            Transforms id to word
+        metadata : bool
+            Any additional info
+
+        Returns
+        -------
+        list of int
+            Fields' offsets
         """
         if id2word is None:
             logger.info("no word id mapping provided; initializing from corpus")
@@ -142,10 +160,15 @@ def docbyoffset(self, offset):
         """
         Return document corresponding to `offset`.
 
-        Args:
-            offset (int): position of the document in the file
-        Returns:
-            :obj:`list` of (:obj:`int`, :obj:`float`): document's list representation
+        Parameters
+        ----------
+        offset : int
+            Position of the document in the file
+
+        Returns
+        -------
+        list of (int, float)
+            Document's list representation
         """
         with utils.smart_open(self.fname) as f:
             f.seek(offset)