Skip to content

Commit

Permalink
Rollup merge of rust-lang#101731 - notriddle:notriddle/more-improved-…
Browse files Browse the repository at this point in the history
…html-check, r=GuillaumeGomez

rustdoc: improve rustdoc HTML suggestions handling of nested generics

Based on some poor suggestions produced when stablizing this lint and running it on `manformed-generics.rs` in rust-lang#101720
  • Loading branch information
GuillaumeGomez authored Sep 12, 2022
2 parents bbe23e8 + 84ca399 commit 05a267f
Show file tree
Hide file tree
Showing 6 changed files with 330 additions and 13 deletions.
83 changes: 80 additions & 3 deletions src/librustdoc/passes/html_tags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
if current_pos == end_pos { None } else { Some(current_pos) }
}

fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
use rustc_lexer::{is_id_continue, is_id_start};
let mut current_pos = start_pos;
loop {
if current_pos < text.len() && text[current_pos..].starts_with("::") {
current_pos += 2;
} else {
break;
}
let mut chars = text[current_pos..].chars();
if let Some(c) = chars.next() {
if is_id_start(c) {
current_pos += c.len_utf8();
} else {
break;
}
}
while let Some(c) = chars.next() {
if is_id_continue(c) {
current_pos += c.len_utf8();
} else {
break;
}
}
}
if current_pos == start_pos { None } else { Some(current_pos) }
}

fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
// https://spec.commonmark.org/0.30/#raw-html
//
Expand Down Expand Up @@ -218,19 +246,68 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
// If a tag looks like `<this>`, it might actually be a generic.
// We don't try to detect stuff `<like, this>` because that's not valid HTML,
// and we don't try to detect stuff `<like this>` because that's not valid Rust.
if let Some(Some(generics_start)) = (is_open_tag
&& dox[..range.end].ends_with('>'))
let mut generics_end = range.end;
if let Some(Some(mut generics_start)) = (is_open_tag
&& dox[..generics_end].ends_with('>'))
.then(|| extract_path_backwards(&dox, range.start))
{
while generics_start != 0
&& generics_end < dox.len()
&& dox.as_bytes()[generics_start - 1] == b'<'
&& dox.as_bytes()[generics_end] == b'>'
{
generics_end += 1;
generics_start -= 1;
if let Some(new_start) = extract_path_backwards(&dox, generics_start) {
generics_start = new_start;
}
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
generics_end = new_end;
}
}
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
generics_end = new_end;
}
let generics_sp = match super::source_span_for_markdown_range(
tcx,
&dox,
&(generics_start..range.end),
&(generics_start..generics_end),
&item.attrs,
) {
Some(sp) => sp,
None => item.attr_span(tcx),
};
// Sometimes, we only extract part of a path. For example, consider this:
//
// <[u32] as IntoIter<u32>>::Item
// ^^^^^ unclosed HTML tag `u32`
//
// We don't have any code for parsing fully-qualified trait paths.
// In theory, we could add it, but doing it correctly would require
// parsing the entire path grammar, which is problematic because of
// overlap between the path grammar and Markdown.
//
// The example above shows that ambiguity. Is `[u32]` intended to be an
// intra-doc link to the u32 primitive, or is it intended to be a slice?
//
// If the below conditional were removed, we would suggest this, which is
// not what the user probably wants.
//
// <[u32] as `IntoIter<u32>`>::Item
//
// We know that the user actually wants to wrap the whole thing in a code
// block, but the only reason we know that is because `u32` does not, in
// fact, implement IntoIter. If the example looks like this:
//
// <[Vec<i32>] as IntoIter<i32>::Item
//
// The ideal fix would be significantly different.
if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
|| (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
{
diag.emit();
return;
}
// multipart form is chosen here because ``Vec<i32>`` would be confusing.
diag.multipart_suggestion(
"try marking as source code",
Expand Down
42 changes: 42 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,48 @@ pub struct ConstGeneric;
// HTML tags cannot contain commas, so no error.
pub struct MultipleGenerics;

/// This <[u32] as Iterator<Item>> thing!
//~^ERROR unclosed HTML tag `Item`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
//
// The important part is that we don't produce any *wrong* suggestions.
// While several other examples below are added to make sure we don't
// produce suggestions when given complex paths, this example is the actual
// reason behind not just using the real path parser. It's ambiguous: there's
// no way to locally reason out whether that `[u32]` is intended to be a slice
// or an intra-doc link.
pub struct FullyQualifiedPathsDoNotCount;

/// This <Vec as IntoIter>::Iter thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount1;

/// This Vec<Vec as IntoIter>::Iter thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount2;

/// This Vec<Vec as IntoIter> thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount3;

/// This Vec<Vec<i32> as IntoIter> thing!
//~^ERROR unclosed HTML tag `i32`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount4;

/// This Vec<i32 class="test"> thing!
//~^ERROR unclosed HTML tag `i32`
// HTML attributes shouldn't be treated as Rust syntax, so no suggestions.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,68 @@
error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:11:13
error: unclosed HTML tag `Item`
--> $DIR/html-as-generics-no-suggestions.rs:11:28
|
LL | /// This Vec<i32 class="test"> thing!
| ^^^^
LL | /// This <[u32] as Iterator<Item>> thing!
| ^^^^^^
|
note: the lint level is defined here
--> $DIR/html-as-generics-no-suggestions.rs:1:9
|
LL | #![deny(rustdoc::invalid_html_tags)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:25:10
|
LL | /// This <Vec as IntoIter>::Iter thing!
| ^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:32:13
|
LL | /// This Vec<Vec as IntoIter>::Iter thing!
| ^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:39:13
|
LL | /// This Vec<Vec as IntoIter> thing!
| ^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:46:17
|
LL | /// This Vec<Vec<i32> as IntoIter> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:53:13
|
LL | /// This Vec<i32 class="test"> thing!
| ^^^^

error: unopened HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:20:13
--> $DIR/html-as-generics-no-suggestions.rs:62:13
|
LL | /// This Vec</i32> thing!
| ^^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:25:13
--> $DIR/html-as-generics-no-suggestions.rs:67:13
|
LL | /// This 123<i32> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:30:14
--> $DIR/html-as-generics-no-suggestions.rs:72:14
|
LL | /// This Vec:<i32> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:35:39
--> $DIR/html-as-generics-no-suggestions.rs:77:39
|
LL | /// This [link](https://rust-lang.org)<i32> thing!
| ^^^^^

error: aborting due to 5 previous errors
error: aborting due to 10 previous errors

40 changes: 40 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics.fixed
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,43 @@ pub struct BareTurbofish;
//~^ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct Nested;

/// Nested generics `Vec<Vec<u32>>`
//~^ ERROR unclosed HTML tag `u32`
//~|HELP try marking as source
pub struct NestedGenerics;

/// Generics with path `Vec<i32>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct GenericsWithPath;

/// Generics with path `<Vec<i32>>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath;

/// Generics with path `Vec<Vec<i32>>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath2;

/// Generics with bump `<Vec<i32>>`s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump;

/// Generics with bump `Vec<Vec<i32>>`s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump2;

/// Generics with punct `<Vec<i32>>`!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct;

/// Generics with punct `Vec<Vec<i32>>`!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct2;
40 changes: 40 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,43 @@ pub struct BareTurbofish;
//~^ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct Nested;

/// Nested generics Vec<Vec<u32>>
//~^ ERROR unclosed HTML tag `u32`
//~|HELP try marking as source
pub struct NestedGenerics;

/// Generics with path Vec<i32>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct GenericsWithPath;

/// Generics with path <Vec<i32>>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath;

/// Generics with path Vec<Vec<i32>>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath2;

/// Generics with bump <Vec<i32>>s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump;

/// Generics with bump Vec<Vec<i32>>s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump2;

/// Generics with punct <Vec<i32>>!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct;

/// Generics with punct Vec<Vec<i32>>!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct2;
Loading

0 comments on commit 05a267f

Please sign in to comment.