Frontend for yields & raises docstrings (#141)

* Frontend for `yields` & `raises` docstrings * returnsTitle * Fix tests * Rm dev change * Fix test * Support md link raise errors too * [dev change should be reverted] * Fix bad git merge * dedup some raise * Revert dev change * Add readme about svelte local dev (#290) * Add raise to readme
huggingface · Sep 5, 2022 · b648e06 · b648e06
1 parent 4391ce4
commit b648e06
Show file tree

Hide file tree

Showing 6 changed files with 118 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -324,6 +324,26 @@ Here's an example for tuple return, comprising several objects:
         - **prediction_scores** (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`) --
           Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
 ```
+Here's an example with `Raise`:
+```
+    Args:
+         config ([`BertConfig`]):
+            Model configuration class with all the parameters of the model.
+
+            Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+
+    Raises:
+        `pa.ArrowInvalidError`: if the arrow data casting fails
+        TypeError: if the target type is not supported according, e.g.
+            - point1
+            - point2
+        [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if credentials are invalid
+        [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if connection got lost
+
+    Returns:
+        `List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
+```
 
 There are directives for `Added`, `Changed`, & `Deprecated`.
 Here's an example:

diff --git a/kit/preprocess.js b/kit/preprocess.js
@@ -16,6 +16,10 @@ export const docstringPreprocess = {
 		const REGEX_PARAMSGROUPS = /<paramgroups>(((?!<paramgroups>).)*)<\/paramgroups>/ms;
 		const REGEX_RETDESC = /<retdesc>(((?!<retdesc>).)*)<\/retdesc>/ms;
 		const REGEX_RETTYPE = /<rettype>(((?!<rettype>).)*)<\/rettype>/ms;
+		const REGEX_YIELDESC = /<yieldesc>(((?!<yieldesc>).)*)<\/yieldesc>/ms;
+		const REGEX_YIELDTYPE = /<yieldtype>(((?!<yieldtype>).)*)<\/yieldtype>/ms;
+		const REGEX_RAISEDESC = /<raises>(((?!<raises>).)*)<\/raises>/ms;
+		const REGEX_RAISETYPE = /<raisederrors>(((?!<raisederrors>).)*)<\/raisederrors>/ms;
 		const REGEX_SOURCE = /<source>(((?!<source>).)*)<\/source>/ms;
 		const REGEX_TIP = /<Tip( warning={true})?>(((?!<Tip( warning={true})?>).)*)<\/Tip>/gms;
 		const REGEX_CHANGED =
@@ -107,6 +111,30 @@ export const docstringPreprocess = {
 				svelteComponent += ` returnType={${JSON.stringify(code)}} `;
 			}
 
+			if (docstringBody.match(REGEX_YIELDESC)) {
+				const yieldDesc = docstringBody.match(REGEX_YIELDESC)[1];
+				const { code } = await mdsvexPreprocess.markup({ content: yieldDesc, filename });
+				svelteComponent += ` returnDescription={${JSON.stringify(code)}} `;
+			}
+
+			if (docstringBody.match(REGEX_YIELDTYPE)) {
+				const yieldType = docstringBody.match(REGEX_YIELDTYPE)[1];
+				const { code } = await mdsvexPreprocess.markup({ content: yieldType, filename });
+				svelteComponent += ` returnType={${JSON.stringify(code)}} isYield={true} `;
+			}
+
+			if (docstringBody.match(REGEX_RAISEDESC)) {
+				const raiseDesc = docstringBody.match(REGEX_RAISEDESC)[1];
+				const { code } = await mdsvexPreprocess.markup({ content: raiseDesc, filename });
+				svelteComponent += ` raiseDescription={${JSON.stringify(code)}} `;
+			}
+
+			if (docstringBody.match(REGEX_RAISETYPE)) {
+				const raiseType = docstringBody.match(REGEX_RAISETYPE)[1];
+				const { code } = await mdsvexPreprocess.markup({ content: raiseType, filename });
+				svelteComponent += ` raiseType={${JSON.stringify(code)}} `;
+      }
+
 			if (docstringBody.match(REGEX_IS_GETSET_DESC)) {
 				svelteComponent += ` isGetSetDescriptor={true} `;
 			}

diff --git a/kit/src/lib/Docstring.svelte b/kit/src/lib/Docstring.svelte
@@ -19,6 +19,9 @@
 	}[];
 	export let returnDescription: string;
 	export let returnType: string;
+	export let isYield = false;
+	export let raiseDescription: string;
+	export let raiseType: string;
 	export let source: string | undefined = undefined;
 	export let hashlink: string | undefined;
 	export let isGetSetDescriptor = false;
@@ -32,6 +35,8 @@
 			const { name, description } = element;
 			return { ...acc, [name]: description };
 		}, {}) || {};
+	const returnsTitle = isYield ? "Yields" : "Returns";
+	const returnsAnchor = returnsTitle.toLowerCase();
 	const bgHighlightClass = "bg-yellow-50 dark:bg-[#494a3d]";
 
 	onMount(() => {
@@ -141,7 +146,7 @@
 						? 'cursor-pointer'
 						: 'cursor-default'}"
 					on:click|preventDefault|stopPropagation={() =>
-						onClick(`${anchor}.returns`, !!returnDescription)}
+						onClick(`${anchor}.${returnsAnchor}`, !!returnDescription)}
 					>{@html replaceParagraphWithSpan(returnType)}</span
 				>
 			{/if}
@@ -214,19 +219,32 @@
 		{/if}
 		{#if !!returnType}
 			<div
+				id={`${anchor}.${returnsAnchor}`}
 				class="flex items-center font-semibold space-x-3 text-base !mt-0 !mb-0 text-gray-800 rounded {hashlink ===
 				anchor
 					? bgHighlightClass
 					: ''}"
-				id={`${anchor}.returns`}
 			>
-				<p class="text-base">Returns</p>
+				<p class="text-base">{returnsTitle}</p>
 				{#if !!returnType}
 					{@html returnType}
 				{/if}
 				<span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700" />
 			</div>
 			<p class="text-base">{@html returnDescription || ""}</p>
 		{/if}
+		{#if !!raiseType}
+			<div
+				class="flex items-center font-semibold space-x-3 text-base !mt-0 !mb-0 text-gray-800"
+				id={`${anchor}.raises`}
+			>
+				<p class="text-base">Raises</p>
+				{#if !!raiseType}
+					{@html raiseType}
+				{/if}
+				<span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700" />
+			</div>
+			<p class="text-base">{@html raiseDescription || ""}</p>
+		{/if}
 	</div>
 </div>
diff --git a/src/doc_builder/convert_rst_to_mdx.py b/src/doc_builder/convert_rst_to_mdx.py
@@ -343,6 +343,17 @@ def split_return_line(line):
     return ":".join(splits_on_colon[:idx]), ":".join(splits_on_colon[idx:])
 
 
+def split_raise_line(line):
+    """
+    Split the raise line with format `SomeError some doc`.
+    """
+    splits_on_colon = line.strip().split(" ")
+    error_type, doc = splits_on_colon[0], " ".join(splits_on_colon[1:])
+    if error_type and error_type[-1] == ":":
+        error_type = error_type[:-1]
+    return error_type, doc
+
+
 def split_arg_line(line):
     """
     Split the return line with format `type: some doc`. Type may contain colons in the form of :obj: or :class:.
@@ -361,6 +372,7 @@ class InvalidRstDocstringError(ValueError):
 
 
 _re_parameters = re.compile(r"<parameters>(((?!<parameters>).)*)</parameters>", re.DOTALL)
+_re_md_link = re.compile(r"\[(.+)\]\(.+\)", re.DOTALL)
 
 
 def parse_rst_docstring(docstring):
@@ -419,10 +431,15 @@ def parse_rst_docstring(docstring):
                     idx += 1
             else:
                 while idx < len(lines) and find_indent(lines[idx]) == return_indent:
-                    return_type, return_description = split_return_line(lines[idx])
-                    raised_error = re.sub(r"^\s*`?([\w\.]*)`?$", r"`\1`", return_type)
-                    lines[idx] = "- " + raised_error + " --" + return_description
-                    raised_errors.append(raised_error)
+                    return_type, return_description = split_raise_line(lines[idx])
+                    raised_error = re.sub(r"^\s*`?([\w\.]*)`?$", r"``\1``", return_type)
+                    lines[idx] = "- " + raised_error + " -- " + return_description
+                    md_link = _re_md_link.match(raised_error)
+                    if md_link:
+                        raised_error = md_link[1]
+                        raised_error = re.sub(r"^\s*`?([\w\.]*)`?$", r"``\1``", raised_error)
+                    if raised_error not in raised_errors:
+                        raised_errors.append(raised_error)
                     idx += 1
                     while idx < len(lines) and (is_empty_line(lines[idx]) or find_indent(lines[idx]) > return_indent):
                         idx += 1

diff --git a/tests/test_autodoc.py b/tests/test_autodoc.py
@@ -72,9 +72,9 @@
 
 <returntype>            `List[int]`</returntype>
 <raises>
-- `ValuError` -- this value error will be raised on wrong input type.
+- ``ValuError`` -- this value error will be raised on wrong input type.
 </raises>
-<raisederrors>`ValuError`</raisederrors>
+<raisederrors>``ValuError``</raisederrors>
 """
 
 TEST_DOCSTRING_WITH_EXAMPLE = """Constructs a BERTweet tokenizer, using Byte-Pair-Encoding.
@@ -205,7 +205,7 @@ def test_get_signature_component(self):
         ]
         object_doc = TEST_DOCSTRING
         source_link = "test_link"
-        expected_signature_component = '<docstring><name>class transformers.BertweetTokenizer</name><anchor>transformers.BertweetTokenizer</anchor><source>test_link</source><parameters>[{"name": "vocab_file", "val": ""}, {"name": "normalization", "val": " = False"}, {"name": "bos_token", "val": " = \'&amp;lt;s>\'"}]</parameters><paramsdesc>- **vocab_file** (`str`) --\n  Path to the vocabulary file.\n- **merges_file** (`str`) --\n  Path to the merges file.\n- **normalization** (`bool`, _optional_, defaults to `False`) --\n  Whether or not to apply a normalization preprocess.\n\n<Tip>\n\nWhen building a sequence using special tokens, this is not the token that is used for the beginning of\nsequence. The token used is the `cls_token`.\n\n</Tip></paramsdesc><paramgroups>0</paramgroups><rettype>`List[int]`</rettype><retdesc>List of [input IDs](../glossary.html#input-ids) with the appropriate special tokens.</retdesc><raises>- `ValuError` -- this value error will be raised on wrong input type.</raises><raisederrors>`ValuError`</raisederrors></docstring>\nConstructs a BERTweet tokenizer, using Byte-Pair-Encoding.\n\nThis tokenizer inherits from [`~transformers.PreTrainedTokenizer`] which contains most of the main methods.\nUsers should refer to this superclass for more information regarding those methods.\n\n\n\n\n\n\n\n\n'
+        expected_signature_component = '<docstring><name>class transformers.BertweetTokenizer</name><anchor>transformers.BertweetTokenizer</anchor><source>test_link</source><parameters>[{"name": "vocab_file", "val": ""}, {"name": "normalization", "val": " = False"}, {"name": "bos_token", "val": " = \'&amp;lt;s>\'"}]</parameters><paramsdesc>- **vocab_file** (`str`) --\n  Path to the vocabulary file.\n- **merges_file** (`str`) --\n  Path to the merges file.\n- **normalization** (`bool`, _optional_, defaults to `False`) --\n  Whether or not to apply a normalization preprocess.\n\n<Tip>\n\nWhen building a sequence using special tokens, this is not the token that is used for the beginning of\nsequence. The token used is the `cls_token`.\n\n</Tip></paramsdesc><paramgroups>0</paramgroups><rettype>`List[int]`</rettype><retdesc>List of [input IDs](../glossary.html#input-ids) with the appropriate special tokens.</retdesc><raises>- ``ValuError`` -- this value error will be raised on wrong input type.</raises><raisederrors>``ValuError``</raisederrors></docstring>\nConstructs a BERTweet tokenizer, using Byte-Pair-Encoding.\n\nThis tokenizer inherits from [`~transformers.PreTrainedTokenizer`] which contains most of the main methods.\nUsers should refer to this superclass for more information regarding those methods.\n\n\n\n\n\n\n\n\n'
         self.assertEqual(
             get_signature_component(name, anchor, signature, object_doc, source_link), expected_signature_component
         )

diff --git a/tests/test_convert_rst_to_mdx.py b/tests/test_convert_rst_to_mdx.py
@@ -51,6 +51,7 @@
     remove_indent,
     split_arg_line,
     split_pt_tf_code_blocks,
+    split_raise_line,
     split_return_line,
 )
 
@@ -473,6 +474,23 @@ def test_split_return_line(self):
         self.assertEqual(split_return_line(":class:`IterableDataset`"), (":class:`IterableDataset`", ""))
         self.assertEqual(split_return_line("`int`"), ("`int`", ""))
 
+    def test_split_raise_line(self):
+        self.assertEqual(split_raise_line("SomeError some error"), ("SomeError", "some error"))
+        self.assertEqual(split_raise_line("SomeError: some error"), ("SomeError", "some error"))
+        self.assertEqual(
+            split_raise_line("[SomeError](https:://someurl): some error"),
+            ("[SomeError](https:://someurl)", "some error"),
+        )
+        self.assertEqual(
+            split_raise_line(
+                "[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if credentials are invalid"
+            ),
+            (
+                "[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError)",
+                "if credentials are invalid",
+            ),
+        )
+
     def test_split_arg_line(self):
         self.assertEqual(split_arg_line("   x (:obj:`int`): an int"), ("   x (:obj:`int`)", " an int"))
         self.assertEqual(split_arg_line("   x (:obj:`int`)"), ("   x (:obj:`int`)", ""))
@@ -492,6 +510,8 @@ def test_parse_rst_docsting(self):
     TypeError: if the target type is not supported according, e.g.
         - point1
         - point2
+    [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if credentials are invalid
+    [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if connection got lost
 
 Returns:
     :obj:`str` or :obj:`bool`: some result
@@ -515,14 +535,16 @@ def test_parse_rst_docsting(self):
 
 <raises>
 
-- `pa.ArrowInvalidError` -- if the arrow data casting fails
-- `TypeError` -- if the target type is not supported according, e.g.
+- ``pa.ArrowInvalidError`` -- if the arrow data casting fails
+- ``TypeError`` -- if the target type is not supported according, e.g.
         - point1
         - point2
+- [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) -- if credentials are invalid
+- [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) -- if connection got lost
 
 </raises>
 
-<raisederrors>`pa.ArrowInvalidError` or `TypeError`</raisederrors>
+<raisederrors>``pa.ArrowInvalidError`` or ``TypeError`` or ``HTTPError``</raisederrors>
 
 <returns>