Skip to content

Commit

Permalink
Frontend for yields & raises docstrings (#141)
Browse files Browse the repository at this point in the history
* Frontend for `yields` & `raises` docstrings

* returnsTitle

* Fix tests

* Rm dev change

* Fix test

* Support md link raise errors too

* [dev change should be reverted]

* Fix bad git merge

* dedup some raise

* Revert dev change

* Add readme about svelte local dev (#290)

* Add raise to readme
  • Loading branch information
mishig25 authored Sep 5, 2022
1 parent 4391ce4 commit b648e06
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 13 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,26 @@ Here's an example for tuple return, comprising several objects:
- **prediction_scores** (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`) --
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
```
Here's an example with `Raise`:
```
Args:
config ([`BertConfig`]):
Model configuration class with all the parameters of the model.
Initializing with a config file does not load the weights associated with the model, only the
configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
Raises:
`pa.ArrowInvalidError`: if the arrow data casting fails
TypeError: if the target type is not supported according, e.g.
- point1
- point2
[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if credentials are invalid
[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if connection got lost
Returns:
`List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
```

There are directives for `Added`, `Changed`, & `Deprecated`.
Here's an example:
Expand Down
28 changes: 28 additions & 0 deletions kit/preprocess.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ export const docstringPreprocess = {
const REGEX_PARAMSGROUPS = /<paramgroups>(((?!<paramgroups>).)*)<\/paramgroups>/ms;
const REGEX_RETDESC = /<retdesc>(((?!<retdesc>).)*)<\/retdesc>/ms;
const REGEX_RETTYPE = /<rettype>(((?!<rettype>).)*)<\/rettype>/ms;
const REGEX_YIELDESC = /<yieldesc>(((?!<yieldesc>).)*)<\/yieldesc>/ms;
const REGEX_YIELDTYPE = /<yieldtype>(((?!<yieldtype>).)*)<\/yieldtype>/ms;
const REGEX_RAISEDESC = /<raises>(((?!<raises>).)*)<\/raises>/ms;
const REGEX_RAISETYPE = /<raisederrors>(((?!<raisederrors>).)*)<\/raisederrors>/ms;
const REGEX_SOURCE = /<source>(((?!<source>).)*)<\/source>/ms;
const REGEX_TIP = /<Tip( warning={true})?>(((?!<Tip( warning={true})?>).)*)<\/Tip>/gms;
const REGEX_CHANGED =
Expand Down Expand Up @@ -107,6 +111,30 @@ export const docstringPreprocess = {
svelteComponent += ` returnType={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_YIELDESC)) {
const yieldDesc = docstringBody.match(REGEX_YIELDESC)[1];
const { code } = await mdsvexPreprocess.markup({ content: yieldDesc, filename });
svelteComponent += ` returnDescription={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_YIELDTYPE)) {
const yieldType = docstringBody.match(REGEX_YIELDTYPE)[1];
const { code } = await mdsvexPreprocess.markup({ content: yieldType, filename });
svelteComponent += ` returnType={${JSON.stringify(code)}} isYield={true} `;
}

if (docstringBody.match(REGEX_RAISEDESC)) {
const raiseDesc = docstringBody.match(REGEX_RAISEDESC)[1];
const { code } = await mdsvexPreprocess.markup({ content: raiseDesc, filename });
svelteComponent += ` raiseDescription={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_RAISETYPE)) {
const raiseType = docstringBody.match(REGEX_RAISETYPE)[1];
const { code } = await mdsvexPreprocess.markup({ content: raiseType, filename });
svelteComponent += ` raiseType={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_IS_GETSET_DESC)) {
svelteComponent += ` isGetSetDescriptor={true} `;
}
Expand Down
24 changes: 21 additions & 3 deletions kit/src/lib/Docstring.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
}[];
export let returnDescription: string;
export let returnType: string;
export let isYield = false;
export let raiseDescription: string;
export let raiseType: string;
export let source: string | undefined = undefined;
export let hashlink: string | undefined;
export let isGetSetDescriptor = false;
Expand All @@ -32,6 +35,8 @@
const { name, description } = element;
return { ...acc, [name]: description };
}, {}) || {};
const returnsTitle = isYield ? "Yields" : "Returns";
const returnsAnchor = returnsTitle.toLowerCase();
const bgHighlightClass = "bg-yellow-50 dark:bg-[#494a3d]";
onMount(() => {
Expand Down Expand Up @@ -141,7 +146,7 @@
? 'cursor-pointer'
: 'cursor-default'}"
on:click|preventDefault|stopPropagation={() =>
onClick(`${anchor}.returns`, !!returnDescription)}
onClick(`${anchor}.${returnsAnchor}`, !!returnDescription)}
>{@html replaceParagraphWithSpan(returnType)}</span
>
{/if}
Expand Down Expand Up @@ -214,19 +219,32 @@
{/if}
{#if !!returnType}
<div
id={`${anchor}.${returnsAnchor}`}
class="flex items-center font-semibold space-x-3 text-base !mt-0 !mb-0 text-gray-800 rounded {hashlink ===
anchor
? bgHighlightClass
: ''}"
id={`${anchor}.returns`}
>
<p class="text-base">Returns</p>
<p class="text-base">{returnsTitle}</p>
{#if !!returnType}
{@html returnType}
{/if}
<span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700" />
</div>
<p class="text-base">{@html returnDescription || ""}</p>
{/if}
{#if !!raiseType}
<div
class="flex items-center font-semibold space-x-3 text-base !mt-0 !mb-0 text-gray-800"
id={`${anchor}.raises`}
>
<p class="text-base">Raises</p>
{#if !!raiseType}
{@html raiseType}
{/if}
<span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700" />
</div>
<p class="text-base">{@html raiseDescription || ""}</p>
{/if}
</div>
</div>
25 changes: 21 additions & 4 deletions src/doc_builder/convert_rst_to_mdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,17 @@ def split_return_line(line):
return ":".join(splits_on_colon[:idx]), ":".join(splits_on_colon[idx:])


def split_raise_line(line):
"""
Split the raise line with format `SomeError some doc`.
"""
splits_on_colon = line.strip().split(" ")
error_type, doc = splits_on_colon[0], " ".join(splits_on_colon[1:])
if error_type and error_type[-1] == ":":
error_type = error_type[:-1]
return error_type, doc


def split_arg_line(line):
"""
Split the return line with format `type: some doc`. Type may contain colons in the form of :obj: or :class:.
Expand All @@ -361,6 +372,7 @@ class InvalidRstDocstringError(ValueError):


_re_parameters = re.compile(r"<parameters>(((?!<parameters>).)*)</parameters>", re.DOTALL)
_re_md_link = re.compile(r"\[(.+)\]\(.+\)", re.DOTALL)


def parse_rst_docstring(docstring):
Expand Down Expand Up @@ -419,10 +431,15 @@ def parse_rst_docstring(docstring):
idx += 1
else:
while idx < len(lines) and find_indent(lines[idx]) == return_indent:
return_type, return_description = split_return_line(lines[idx])
raised_error = re.sub(r"^\s*`?([\w\.]*)`?$", r"`\1`", return_type)
lines[idx] = "- " + raised_error + " --" + return_description
raised_errors.append(raised_error)
return_type, return_description = split_raise_line(lines[idx])
raised_error = re.sub(r"^\s*`?([\w\.]*)`?$", r"``\1``", return_type)
lines[idx] = "- " + raised_error + " -- " + return_description
md_link = _re_md_link.match(raised_error)
if md_link:
raised_error = md_link[1]
raised_error = re.sub(r"^\s*`?([\w\.]*)`?$", r"``\1``", raised_error)
if raised_error not in raised_errors:
raised_errors.append(raised_error)
idx += 1
while idx < len(lines) and (is_empty_line(lines[idx]) or find_indent(lines[idx]) > return_indent):
idx += 1
Expand Down
6 changes: 3 additions & 3 deletions tests/test_autodoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@
<returntype> `List[int]`</returntype>
<raises>
- `ValuError` -- this value error will be raised on wrong input type.
- ``ValuError`` -- this value error will be raised on wrong input type.
</raises>
<raisederrors>`ValuError`</raisederrors>
<raisederrors>``ValuError``</raisederrors>
"""

TEST_DOCSTRING_WITH_EXAMPLE = """Constructs a BERTweet tokenizer, using Byte-Pair-Encoding.
Expand Down Expand Up @@ -205,7 +205,7 @@ def test_get_signature_component(self):
]
object_doc = TEST_DOCSTRING
source_link = "test_link"
expected_signature_component = '<docstring><name>class transformers.BertweetTokenizer</name><anchor>transformers.BertweetTokenizer</anchor><source>test_link</source><parameters>[{"name": "vocab_file", "val": ""}, {"name": "normalization", "val": " = False"}, {"name": "bos_token", "val": " = \'&amp;lt;s>\'"}]</parameters><paramsdesc>- **vocab_file** (`str`) --\n Path to the vocabulary file.\n- **merges_file** (`str`) --\n Path to the merges file.\n- **normalization** (`bool`, _optional_, defaults to `False`) --\n Whether or not to apply a normalization preprocess.\n\n<Tip>\n\nWhen building a sequence using special tokens, this is not the token that is used for the beginning of\nsequence. The token used is the `cls_token`.\n\n</Tip></paramsdesc><paramgroups>0</paramgroups><rettype>`List[int]`</rettype><retdesc>List of [input IDs](../glossary.html#input-ids) with the appropriate special tokens.</retdesc><raises>- `ValuError` -- this value error will be raised on wrong input type.</raises><raisederrors>`ValuError`</raisederrors></docstring>\nConstructs a BERTweet tokenizer, using Byte-Pair-Encoding.\n\nThis tokenizer inherits from [`~transformers.PreTrainedTokenizer`] which contains most of the main methods.\nUsers should refer to this superclass for more information regarding those methods.\n\n\n\n\n\n\n\n\n'
expected_signature_component = '<docstring><name>class transformers.BertweetTokenizer</name><anchor>transformers.BertweetTokenizer</anchor><source>test_link</source><parameters>[{"name": "vocab_file", "val": ""}, {"name": "normalization", "val": " = False"}, {"name": "bos_token", "val": " = \'&amp;lt;s>\'"}]</parameters><paramsdesc>- **vocab_file** (`str`) --\n Path to the vocabulary file.\n- **merges_file** (`str`) --\n Path to the merges file.\n- **normalization** (`bool`, _optional_, defaults to `False`) --\n Whether or not to apply a normalization preprocess.\n\n<Tip>\n\nWhen building a sequence using special tokens, this is not the token that is used for the beginning of\nsequence. The token used is the `cls_token`.\n\n</Tip></paramsdesc><paramgroups>0</paramgroups><rettype>`List[int]`</rettype><retdesc>List of [input IDs](../glossary.html#input-ids) with the appropriate special tokens.</retdesc><raises>- ``ValuError`` -- this value error will be raised on wrong input type.</raises><raisederrors>``ValuError``</raisederrors></docstring>\nConstructs a BERTweet tokenizer, using Byte-Pair-Encoding.\n\nThis tokenizer inherits from [`~transformers.PreTrainedTokenizer`] which contains most of the main methods.\nUsers should refer to this superclass for more information regarding those methods.\n\n\n\n\n\n\n\n\n'
self.assertEqual(
get_signature_component(name, anchor, signature, object_doc, source_link), expected_signature_component
)
Expand Down
28 changes: 25 additions & 3 deletions tests/test_convert_rst_to_mdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
remove_indent,
split_arg_line,
split_pt_tf_code_blocks,
split_raise_line,
split_return_line,
)

Expand Down Expand Up @@ -473,6 +474,23 @@ def test_split_return_line(self):
self.assertEqual(split_return_line(":class:`IterableDataset`"), (":class:`IterableDataset`", ""))
self.assertEqual(split_return_line("`int`"), ("`int`", ""))

def test_split_raise_line(self):
self.assertEqual(split_raise_line("SomeError some error"), ("SomeError", "some error"))
self.assertEqual(split_raise_line("SomeError: some error"), ("SomeError", "some error"))
self.assertEqual(
split_raise_line("[SomeError](https:://someurl): some error"),
("[SomeError](https:://someurl)", "some error"),
)
self.assertEqual(
split_raise_line(
"[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if credentials are invalid"
),
(
"[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError)",
"if credentials are invalid",
),
)

def test_split_arg_line(self):
self.assertEqual(split_arg_line(" x (:obj:`int`): an int"), (" x (:obj:`int`)", " an int"))
self.assertEqual(split_arg_line(" x (:obj:`int`)"), (" x (:obj:`int`)", ""))
Expand All @@ -492,6 +510,8 @@ def test_parse_rst_docsting(self):
TypeError: if the target type is not supported according, e.g.
- point1
- point2
[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if credentials are invalid
[`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) if connection got lost
Returns:
:obj:`str` or :obj:`bool`: some result
Expand All @@ -515,14 +535,16 @@ def test_parse_rst_docsting(self):
<raises>
- `pa.ArrowInvalidError` -- if the arrow data casting fails
- `TypeError` -- if the target type is not supported according, e.g.
- ``pa.ArrowInvalidError`` -- if the arrow data casting fails
- ``TypeError`` -- if the target type is not supported according, e.g.
- point1
- point2
- [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) -- if credentials are invalid
- [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError) -- if connection got lost
</raises>
<raisederrors>`pa.ArrowInvalidError` or `TypeError`</raisederrors>
<raisederrors>``pa.ArrowInvalidError`` or ``TypeError`` or ``HTTPError``</raisederrors>
<returns>
Expand Down

0 comments on commit b648e06

Please sign in to comment.