Skip to content

Commit

Permalink
fix(links): fall back to en-us
Browse files Browse the repository at this point in the history
  • Loading branch information
fiji-flo committed Dec 10, 2024
1 parent cdf0993 commit 21a7f18
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 123 deletions.
3 changes: 3 additions & 0 deletions crates/rari-doc/src/cached_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,9 @@ pub fn contributor_spotlight_files() -> Cow<'static, UrlToPageMap> {
/// ### Example
///
/// ```
/// # use rari_doc::cached_readers::wiki_histories;
/// # use rari_types::locale::Locale;
///
/// let wiki_histories = wiki_histories();
/// if let Some(en_us_history) = wiki_histories.get(&Locale::EnUs) {
/// println!("Loaded en-US wiki history: {:?}", en_us_history);
Expand Down
5 changes: 5 additions & 0 deletions crates/rari-doc/src/contributors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ pub type WikiHistories = HashMap<Locale, WikiHistory>;
/// # Example
///
/// ```rust
/// # use rari_doc::contributors::contributors_txt;
/// # use rari_doc::contributors::WikiHistoryEntry;
///
/// let github_file_url = "https://github.com/user/repo/blob/main/file.txt";
/// let wiki_history = Some(WikiHistoryEntry {
/// contributors: vec!["Alice".to_string(), "Bob".to_string()],
Expand All @@ -56,6 +59,8 @@ pub type WikiHistories = HashMap<Locale, WikiHistory>;
/// If no `wiki_history` is provided:
///
/// ```rust
/// # use rari_doc::contributors::contributors_txt;
/// let github_file_url = "https://github.com/user/repo/blob/main/file.txt";
/// let result = contributors_txt(None, github_file_url);
/// println!("{}", result);
/// // Output:
Expand Down
184 changes: 184 additions & 0 deletions crates/rari-doc/src/html/fix_link.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
use std::borrow::Cow;

use lol_html::{html_content::Element, HandlerResult};
use rari_types::{fm_types::PageType, locale::default_locale};
use rari_utils::concat_strs;

use crate::{
helpers::l10n::l10n_json_data,
issues::get_issue_couter,
pages::page::{Page, PageLike},
redirects::resolve_redirect,
resolve::{strip_locale_from_url, url_with_locale},
};

pub fn check_and_fix_link(
el: &mut Element,
page: &impl PageLike,
data_issues: bool,
) -> HandlerResult {
let original_href = el.get_attribute("href").expect("href was required");

if original_href.starts_with('/') || original_href.starts_with("https://developer.mozilla.org")
{
handle_internal_link(&original_href, el, page, data_issues)
} else if original_href.starts_with("http:") || original_href.starts_with("https:") {
handle_extenal_link(el)
} else {
Ok(())
}
}

pub fn handle_extenal_link(el: &mut Element) -> HandlerResult {
let class = el.get_attribute("class").unwrap_or_default();
if !class.split(' ').any(|s| s == "external") {
el.set_attribute(
"class",
&concat_strs!(&class, if class.is_empty() { "" } else { " " }, "external"),
)?;
}
if !el.has_attribute("target") {
el.set_attribute("target", "_blank")?;
}
Ok(())
}

pub fn handle_internal_link(
original_href: &str,
el: &mut Element,
page: &impl PageLike,
data_issues: bool,
) -> HandlerResult {
// Strip prefix for curriculum links.
let original_href = if page.page_type() == PageType::Curriculum {
original_href
.strip_prefix("https://developer.mozilla.org")
.unwrap_or(original_href)
} else {
original_href
};

let href = original_href
.strip_prefix("https://developer.mozilla.org")
.map(|href| if href.is_empty() { "/" } else { href })
.unwrap_or(original_href);
let href_no_hash = &href[..href.find('#').unwrap_or(href.len())];
let (href_locale, _) = strip_locale_from_url(href);
let no_locale = href_locale.is_none();
if no_locale && Page::ignore_link_check(href_no_hash) {
return Ok(());
}
let maybe_prefixed_href = if no_locale {
Cow::Owned(concat_strs!("/", page.locale().as_url_str(), href))
} else {
Cow::Borrowed(href)
};
let mut resolved_href =
resolve_redirect(&maybe_prefixed_href).unwrap_or(Cow::Borrowed(&maybe_prefixed_href));
let mut resolved_href_no_hash =
&resolved_href[..resolved_href.find('#').unwrap_or(resolved_href.len())];
if resolved_href_no_hash == page.url() {
el.set_attribute("aria-current", "page")?;
}
let en_us_fallback = if !Page::exists(resolved_href_no_hash)
&& !Page::ignore_link_check(href)
&& href_locale != Some(default_locale())
{
println!("{resolved_href}");
if let Some(en_us_href) = url_with_locale(&resolved_href, default_locale()) {
resolved_href = resolve_redirect(&en_us_href).unwrap_or(Cow::Owned(en_us_href));
println!("{resolved_href}");
resolved_href_no_hash =
&resolved_href[..resolved_href.find('#').unwrap_or(resolved_href.len())];
}
true
} else {
false
};

let remove_href = if !Page::exists(resolved_href_no_hash) && !Page::ignore_link_check(href) {
tracing::debug!("{resolved_href_no_hash} {href}");
let class = el.get_attribute("class").unwrap_or_default();
el.set_attribute(
"class",
&concat_strs!(
&class,
if class.is_empty() { "" } else { " " },
"page-not-created"
),
)?;
if let Some(href) = el.get_attribute("href") {
el.set_attribute("data-href", &href)?;
}
el.remove_attribute("href");
el.set_attribute("title", l10n_json_data("Common", "summary", page.locale())?)?;
true
} else {
false
};

if !remove_href && en_us_fallback {
let class = el.get_attribute("class").unwrap_or_default();
if !class.split(' ').any(|s| s == "only-in-en-us") {
el.set_attribute(
"class",
&concat_strs!(
&class,
if class.is_empty() { "" } else { " " },
"only-in-en-us"
),
)?;
}
}

let resolved_href = if no_locale {
strip_locale_from_url(&resolved_href).1
} else {
resolved_href.as_ref()
};
if original_href != resolved_href {
if let Some(pos) = el.get_attribute("data-sourcepos") {
if let Some((start, _)) = pos.split_once('-') {
if let Some((line, col)) = start.split_once(':') {
let line = line
.parse::<i64>()
.map(|l| l + i64::try_from(page.fm_offset()).unwrap_or(l - 1))
.ok()
.unwrap_or(-1);
let col = col.parse::<i64>().ok().unwrap_or(0);
let ic = get_issue_couter();
tracing::warn!(
source = "redirected-link",
ic = ic,
line = line,
col = col,
url = original_href,
redirect = resolved_href
);
if data_issues {
el.set_attribute("data-flaw", &ic.to_string())?;
}
}
}
} else {
let ic = get_issue_couter();
tracing::warn!(
source = "redirected-link",
ic = ic,
url = original_href,
redirect = resolved_href
);
if data_issues {
el.set_attribute("data-flaw", &ic.to_string())?;
}
}

if !remove_href {
el.set_attribute("href", resolved_href)?;
}
}
if remove_href {
el.remove_attribute("href");
}
Ok(())
}
1 change: 1 addition & 0 deletions crates/rari-doc/src/html/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod bubble_up;
mod fix_link;
pub mod links;
pub mod modifier;
pub mod rewriter;
Expand Down
3 changes: 3 additions & 0 deletions crates/rari-doc/src/html/modifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ pub fn remove_empty_p(html: &mut Html) -> Result<(), DocError> {
/// # Example
///
/// ```rust
/// # use scraper::Html;
/// # use rari_doc::html::modifier::add_missing_ids;
///
/// let mut html = Html::parse_document("<h2>Some Heading</h2>");
/// add_missing_ids(&mut html);
/// ```
Expand Down
125 changes: 3 additions & 122 deletions crates/rari-doc/src/html/rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@ use tracing::warn;
use url::Url;

use crate::error::DocError;
use crate::helpers::l10n::l10n_json_data;
use crate::html::fix_link::check_and_fix_link;
use crate::issues::get_issue_couter;
use crate::pages::page::{Page, PageLike};
use crate::pages::page::PageLike;
use crate::pages::types::curriculum::CurriculumPage;
use crate::redirects::resolve_redirect;
use crate::resolve::strip_locale_from_url;

pub fn post_process_inline_sidebar(input: &str) -> Result<String, DocError> {
let element_content_handlers = vec![element!("*[data-rewriter=em]", |el| {
Expand Down Expand Up @@ -190,124 +188,7 @@ pub fn post_process_html<T: PageLike>(
Ok(())
}),
element!("a[href]", |el| {
let original_href = el.get_attribute("href").expect("href was required");
// Strip prefix for curriculum links.
let original_href = if page.page_type() == PageType::Curriculum {
original_href
.strip_prefix("https://developer.mozilla.org")
.unwrap_or(&original_href)
} else {
&original_href
};
if original_href.starts_with('/')
|| original_href.starts_with("https://developer.mozilla.org")
{
let href = original_href
.strip_prefix("https://developer.mozilla.org")
.map(|href| if href.is_empty() { "/" } else { href })
.unwrap_or(original_href);
let href_no_hash = &href[..href.find('#').unwrap_or(href.len())];
let no_locale = strip_locale_from_url(href).0.is_none();
if no_locale && Page::ignore_link_check(href_no_hash) {
return Ok(());
}
let maybe_prefixed_href = if no_locale {
Cow::Owned(concat_strs!("/", page.locale().as_url_str(), href))
} else {
Cow::Borrowed(href)
};
let resolved_href = resolve_redirect(&maybe_prefixed_href)
.unwrap_or(Cow::Borrowed(&maybe_prefixed_href));
let resolved_href_no_hash =
&resolved_href[..resolved_href.find('#').unwrap_or(resolved_href.len())];
if resolved_href_no_hash == page.url() {
el.set_attribute("aria-current", "page")?;
}
let remove_href = if !Page::exists(resolved_href_no_hash)
&& !Page::ignore_link_check(href)
{
tracing::debug!("{resolved_href_no_hash} {href}");
let class = el.get_attribute("class").unwrap_or_default();
el.set_attribute(
"class",
&concat_strs!(
&class,
if class.is_empty() { "" } else { " " },
"page-not-created"
),
)?;
if let Some(href) = el.get_attribute("href") {
el.set_attribute("data-href", &href)?;
}
el.remove_attribute("href");
el.set_attribute("title", l10n_json_data("Common", "summary", page.locale())?)?;
true
} else {
false
};
let resolved_href = if no_locale {
strip_locale_from_url(&resolved_href).1
} else {
resolved_href.as_ref()
};
if original_href != resolved_href {
if let Some(pos) = el.get_attribute("data-sourcepos") {
if let Some((start, _)) = pos.split_once('-') {
if let Some((line, col)) = start.split_once(':') {
let line = line
.parse::<i64>()
.map(|l| l + i64::try_from(page.fm_offset()).unwrap_or(l - 1))
.ok()
.unwrap_or(-1);
let col = col.parse::<i64>().ok().unwrap_or(0);
let ic = get_issue_couter();
tracing::warn!(
source = "redirected-link",
ic = ic,
line = line,
col = col,
url = original_href,
redirect = resolved_href
);
if data_issues {
el.set_attribute("data-flaw", &ic.to_string())?;
}
}
}
} else {
let ic = get_issue_couter();
tracing::warn!(
source = "redirected-link",
ic = ic,
url = original_href,
redirect = resolved_href
);
if data_issues {
el.set_attribute("data-flaw", &ic.to_string())?;
}
}

if !remove_href {
el.set_attribute("href", resolved_href)?;
}
}
if remove_href {
el.remove_attribute("href");
}
} else if original_href.starts_with("http:") || original_href.starts_with("https:") {
let class = el.get_attribute("class").unwrap_or_default();
if !class.split(' ').any(|s| s == "external") {
el.set_attribute(
"class",
&concat_strs!(&class, if class.is_empty() { "" } else { " " }, "external"),
)?;
}
if !el.has_attribute("target") {
el.set_attribute("target", "_blank")?;
}
}

Ok(())
check_and_fix_link(el, page, data_issues)
}),
element!("pre:not(.notranslate)", |el| {
let mut class = el.get_attribute("class").unwrap_or_default();
Expand Down
3 changes: 2 additions & 1 deletion crates/rari-doc/src/redirects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ where
/// * `Option<Cow<'_, str>>` - Returns `Some(Cow::Borrowed(target_url))` if a redirect is found and the target URL
/// does not contain a hash fragment, or `Some(Cow::Owned(format!("{target_url}{hash}")))` if the target URL
/// contains a hash fragment or the original URL has a hash fragment. Returns `None` if no redirect is found.
pub(crate) fn resolve_redirect(url: &str) -> Option<Cow<'_, str>> {
pub(crate) fn resolve_redirect<'a>(url: impl AsRef<str>) -> Option<Cow<'a, str>> {
let url = url.as_ref();
let hash_index = url.find('#').unwrap_or(url.len());
let (url_no_hash, hash) = (&url[..hash_index], &url[hash_index..]);
match (
Expand Down
Loading

0 comments on commit 21a7f18

Please sign in to comment.