Skip to content

Commit

Permalink
fix: only allow first h1 to be the title (#9474)
Browse files Browse the repository at this point in the history
  • Loading branch information
yufeih authored Nov 25, 2023
1 parent 0a3bd2c commit 2c5d145
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 102 deletions.
62 changes: 47 additions & 15 deletions src/Docfx.Build/Conceptual/BuildConceptualDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@

using System.Collections.Immutable;
using System.Composition;

using System.Net;
using Docfx.Build.Common;
using Docfx.Common;
using Docfx.DataContracts.Common;
using Docfx.Plugins;
using HtmlAgilityPack;

namespace Docfx.Build.ConceptualDocuments;

[Export(nameof(ConceptualDocumentProcessor), typeof(IDocumentBuildStep))]
class BuildConceptualDocument : BaseDocumentBuildStep
{
private const string ConceptualKey = Constants.PropertyName.Conceptual;
private const string DocumentTypeKey = "documentType";

public override string Name => nameof(BuildConceptualDocument);
Expand All @@ -28,16 +28,16 @@ public override void Build(FileModel model, IHostService host)
return;
}
var content = (Dictionary<string, object>)model.Content;
var markdown = (string)content[ConceptualKey];
var markdown = (string)content[Constants.PropertyName.Conceptual];
var result = host.Markup(markdown, model.OriginalFileAndType, false);

var htmlInfo = HtmlDocumentUtility.SeparateHtml(result.Html);
content["rawTitle"] = htmlInfo.RawTitle;
if (!string.IsNullOrEmpty(htmlInfo.RawTitle))
var (h1, h1Raw, conceptual) = ExtractH1(result.Html);
content["rawTitle"] = h1Raw;
if (!string.IsNullOrEmpty(h1Raw))
{
model.ManifestProperties.rawTitle = htmlInfo.RawTitle;
model.ManifestProperties.rawTitle = h1Raw;
}
content[ConceptualKey] = htmlInfo.Content;
content[Constants.PropertyName.Conceptual] = conceptual;

if (result.YamlHeader?.Count > 0)
{
Expand All @@ -47,13 +47,15 @@ public override void Build(FileModel model, IHostService host)
}
}

(content[Constants.PropertyName.Title], model.Properties.IsUserDefinedTitle) = GetTitle(result.YamlHeader, htmlInfo);
content[Constants.PropertyName.Title] = GetTitle(result.YamlHeader, h1);
content["wordCount"] = WordCounter.CountWord(conceptual);

model.LinkToFiles = result.LinkToFiles.ToImmutableHashSet();
model.LinkToUids = result.LinkToUids;
model.FileLinkSources = result.FileLinkSources;
model.UidLinkSources = result.UidLinkSources;
model.Properties.XrefSpec = null;

if (model.Uids.Length > 0)
{
var title = content[Constants.PropertyName.Title] as string;
Expand Down Expand Up @@ -108,31 +110,31 @@ void HandleYamlHeaderPair(string key, object value)
}
}

(string title, bool isUserDefined) GetTitle(ImmutableDictionary<string, object> yamlHeader, SeparatedHtmlInfo info)
string GetTitle(ImmutableDictionary<string, object> yamlHeader, string h1)
{
// title from YAML header
if (yamlHeader != null
&& TryGetStringValue(yamlHeader, Constants.PropertyName.Title, out var yamlHeaderTitle))
{
return (yamlHeaderTitle, true);
return yamlHeaderTitle;
}

// title from metadata/titleOverwriteH1
if (TryGetStringValue(content, Constants.PropertyName.TitleOverwriteH1, out var titleOverwriteH1))
{
return (titleOverwriteH1, true);
return titleOverwriteH1;
}

// title from H1
if (!string.IsNullOrEmpty(info.Title))
if (!string.IsNullOrEmpty(h1))
{
return (info.Title, false);
return h1;
}

// title from globalMetadata or fileMetadata
if (TryGetStringValue(content, Constants.PropertyName.Title, out var title))
{
return (title, true);
return title;
}

return default;
Expand All @@ -152,4 +154,34 @@ bool TryGetStringValue(IDictionary<string, object> dictionary, string key, out s
}
}
}

static (string h1, string h1Raw, string body) ExtractH1(string contentHtml)
{
ArgumentNullException.ThrowIfNull(contentHtml);

var document = new HtmlDocument();
document.LoadHtml(contentHtml);

// InnerText in HtmlAgilityPack is not decoded, should be a bug
var h1Node = document.DocumentNode.SelectSingleNode("//h1");
var h1 = WebUtility.HtmlDecode(h1Node?.InnerText);
var h1Raw = "";
if (h1Node != null && GetFirstNoneCommentChild(document.DocumentNode) == h1Node)
{
h1Raw = h1Node.OuterHtml;
h1Node.Remove();
}

return (h1, h1Raw, document.DocumentNode.OuterHtml);

static HtmlNode GetFirstNoneCommentChild(HtmlNode node)
{
var result = node.FirstChild;
while (result != null && (result.NodeType == HtmlNodeType.Comment || string.IsNullOrWhiteSpace(result.OuterHtml)))
{
result = result.NextSibling;
}
return result;
}
}
}
49 changes: 0 additions & 49 deletions src/Docfx.Build/Conceptual/HtmlDocumentUtility.cs

This file was deleted.

13 changes: 0 additions & 13 deletions src/Docfx.Build/Conceptual/SeparatedHtmlInfo.cs

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,35 +1,10 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Immutable;
using System.Composition;
using Docfx.Build.Common;
using Docfx.DataContracts.Common;
using Docfx.Plugins;
using HtmlAgilityPack;

namespace Docfx.Build.ConceptualDocuments;

[Export(nameof(ConceptualDocumentProcessor), typeof(IDocumentBuildStep))]
class CountWord : BaseDocumentBuildStep
{
public override string Name => nameof(CountWord);

public override int BuildOrder => 1;

public override void Postbuild(ImmutableList<FileModel> models, IHostService host)
{
foreach (var model in models)
{
if (model.Type == DocumentType.Article)
{
var content = (Dictionary<string, object>)model.Content;
content["wordCount"] = WordCounter.CountWord((string)content[Constants.PropertyName.Conceptual]);
}
}
}
}

internal static class WordCounter
{
private static readonly string[] ExcludeNodeXPaths = { "//title" };
Expand Down

0 comments on commit 2c5d145

Please sign in to comment.