Skip to content

Commit

Permalink
Improve markdown and syntax highlighting in server (#88)
Browse files Browse the repository at this point in the history
Fixes #68
  • Loading branch information
phoboslab authored Dec 13, 2023
1 parent 2849b08 commit c236a71
Showing 1 changed file with 260 additions and 8 deletions.
268 changes: 260 additions & 8 deletions llama.cpp/server/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,27 @@
}

pre code {
background-color: #22211f;
padding: 0.3em 0.3em;
display: block;
background-color: #222;
color: #ddd;
overflow-x: auto;
tab-size: 4;
}

code {
font-family: monospace;
padding: 0.1em 0.3em;
border-radius: 3px;
}
code span.sh-c { color: #999; } /* comment */
code span.sh-s { color: #ae2; } /* string */
code span.sh-r { color: #b8f; } /* regexp */
code span.sh-p { color: #5df; } /* punctuation */
code span.sh-k { color: #f07; } /* keyword */
code span.sh-n { color: #fa4; } /* number */
code span.sh-d { color: #a8f; } /* defintion */

fieldset label {
margin: 0.5em 0;
Expand Down Expand Up @@ -828,20 +839,261 @@
});
}

// poor mans markdown replacement
// Syntax Highlighting, condensed down from https://github.com/speed-highlight/core
// Following is a number of regexps that capture some generic highlightable syntax
// such as "strings" or /* comments */. These regexps are then utilizied by the
// language definitions below, assigning each regexp to a `type`. The type maps
// to a sh-* css class.

const SH_REGEXP = {
STRING: /("|')(\\[^]|(?!\1)[^\r\n\\])*\1?/g,
STRING_BACKTICK: /`((?!`)[^]|\\[^])*`?/g,
COMMENT: /\/\/.*\n?|\/\*((?!\*\/)[^])*(\*\/)?/g,
COMMENT_HASH: /#.*\n?/g,
COMMENT_PYTHON: /("""|''')(\\[^]|(?!\1)[^])*\1?/g,
PREPROC: /#\s*\w+(\\\n|[^\n])*\n?/g,
COMMENT_DASH: /--.*\n?/g,
COMMENT_XML: /<!--((?!-->)[^])*-->/g,
KW: /\b(set|get|as|break|case|const|continue|default|delete|do|else|export|for|from|function|goto|if|import|extern|in|let|var|null|of|package|return|static|switch|typeof|void|while)\b/g,
CLASS: /\b(class|constructor|extends|implements|interface|new|private|protected|public|super|this|abstract|final|virtual|instanceof)\b/g,
EXCEPTION: /\b(try|throw|throws|catch|finnaly)\b/g,
BOOL: /\b(true|false)\b/g,
BOOL_PYTHON: /\b(True|False)\b/g,
KW_ASYNC: /\b(async|await|yield)\b/g,
KW_JS: /\b(with|NaN|debugger|undefined)\b/g,
KW_RS: /\b(crate|fn|impl|loop|match|mod|move|mut|pub|ref|self|Self|trait|type|unsafe|use|where|dyn|become|box|macro|override|priv|unsized)\b/g,
KW_LUA: /\b(and|elseif|end|local|nil|not|or|repeat|then|until)\b/g,
KW_PYTHON: /\b(and|as|assert|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b/g,
TYPES: /\b(char|double|enum|float|int|signed|unsigned|struct|union)\b/g,
TYPES_SQL: /\b(varchar|(tiny|medium|long|big)(text|blob|int)|integer|decimal|boolean)\b/g,
REGEX: /\/((?!\/)[^\r\n\\]|\\.)+\/[dgimsuy]*/g,
NUM: /(\.e?|\b)\d(e-|[\d.oxa-fA-F_])*(\.|\b)/g,
DEF: /\b([A-Z][A-Z_]*)\b/g,
OP: /(&|<|>|[/*+:?|%^~=!,.^-])+/g,
XML_ELEM: /<\w+|<\/\w+|>/g
};

const SH_LANGS = {
generic: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.KW},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
py: [
{type: 'c', match: SH_REGEXP.COMMENT_PYTHON},
{type: 'c', match: SH_REGEXP.COMMENT_HASH},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.BOOL_PYTHON},
{type: 'k', match: SH_REGEXP.KW_PYTHON},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
js: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 's', match: SH_REGEXP.STRING_BACKTICK},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.KW_ASYNC},
{type: 'k', match: SH_REGEXP.KW_JS},
{type: 'r', match: SH_REGEXP.REGEX},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
json: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'k', match: SH_REGEXP.BOOL},
],
bash: [
{type: 's', match: SH_REGEXP.STRING},
{type: 'c', match: SH_REGEXP.COMMENT_HASH},
{type: 'p', match: SH_REGEXP.OP},
],
c: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.TYPES},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.PREPROC},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
cpp: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.TYPES},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
rs: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.KW},
{type: 'r', match: SH_REGEXP.REGEX},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.TYPES},
{type: 'k', match: SH_REGEXP.KW_ASYNC},
{type: 'k', match: SH_REGEXP.KW_RS},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
lua: [
{type: 'c', match: SH_REGEXP.COMMENT_DASH},
{type: 'c', match: SH_REGEXP.COMMENT_HASH},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.KW_LUA},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
java: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.TYPES},
],
sql: [
{type: 'c', match: SH_REGEXP.COMMENT_DASH},
{type: 's', match: SH_REGEXP.STRING},
{type: 's', match: SH_REGEXP.STRING_BACKTICK},
{type: 'r', match: SH_REGEXP.TYPES},
{type: 'r', match: SH_REGEXP.TYPES_SQL},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'k', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
html: [
{type: 's', match: SH_REGEXP.STRING},
{type: 'c', match: SH_REGEXP.COMMENT_XML},
{type: 'k', match: SH_REGEXP.XML_ELEM},
],
xml: [
{type: 's', match: SH_REGEXP.STRING},
{type: 'c', match: SH_REGEXP.COMMENT_XML},
{type: 'k', match: SH_REGEXP.XML_ELEM},
]
};
SH_LANGS.python = SH_LANGS.py;
SH_LANGS.rust = SH_LANGS.rs;
SH_LANGS.javascript = SH_LANGS.js;

const Syntaxhighlightish = (src, lang) => {
let html = '';
const cache = [];
const rules = (SH_LANGS[lang] || SH_LANGS.generic).slice();

let i = 0;
while (i < src.length) {
let fm = null;
let ft = '';
for (let r = rules.length; r-- > 0;) {
const token = rules[r];

if (cache[r] === undefined || cache[r].index < i) {
token.match.lastIndex = i;
const match = token.match.exec(src);
if (match === null) {
rules.splice(r, 1);
cache.splice(r, 1);
continue;
}
cache[r] = match;
}
if (cache[r][0] && (fm === null || cache[r].index <= fm.index)) {
fm = cache[r];
ft = token.type;
}
}
if (fm === null) {
break;
}
if (i !== fm.index) {
html += src.substring(i, fm.index);
}
i = fm.index + fm[0].length;
html += '<span class="sh-'+ft+'">'+fm[0]+'</span>';
}
html += src.substring(i, src.length);
return html;
};

// This transforms _some_ markdown to html by replacing code blocks and
// urls with a placeholder, so that any markdown within these already
// matched blocks won't be processed again.

const Markdownish = (params) => {
const blocks = [];
const md = params.text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')

// Multiline code - be liberal with the closing ``` here: we just assume
// it will be closed eventually. This makes the code formatting and
// highlighting work while we still receive more tokens.
.replace(/```(\w*)\n([\s\S]*?)(```|$)/g, (m, lang, code) => {
const id = '<block'+blocks.length+'>';
const block = lang.length ? Syntaxhighlightish(code, lang) : code;
blocks.push('<pre><code class="shl-'+lang+'">'+block+'</code></pre>');
return id;
})

// Inline code
.replace(/`(.*?)`/g, (m, code) => {
const id = '<block'+blocks.length+'>';
blocks.push('<code class="inline">'+code+'</code>');
return id;
})

// Urls. These are often wrapped in <> angle brackets
.replace(/(\b|&lt;)((https?:\/\/(?:www\.)?|www\.)([^\s]+\([^\s]+[^!,.:\s]|[^(\s]+[^)!,.:\s]))/ig, (m, pre, url, httpwww, hostandpath) => {
const id = '<block'+blocks.length+'>';
if (httpwww === 'www.') {
url = 'http://' + url;
}
if (pre === '&lt;') {
hostandpath = hostandpath.replace(/&gt;$/g, '');
url = url.replace(/&gt;$/g, '');
}
blocks.push('<a href="'+url+'">'+hostandpath+'</a>');
return id;
})

// Headlines, emphasis and line breaks
.replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
.replace(/__(.*?)__/g, '<strong>$1</strong>')
.replace(/\*(.*?)\*/g, '<em>$1</em>')
.replace(/_(.*?)_/g, '<em>$1</em>')
.replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
.replace(/`(.*?)`/g, '<code>$1</code>')
.replace(/\n/gim, '<br />');
.replace(/(__|\*\*)(.*?)\1/g, '<strong>$2</strong>')
.replace(/(_|\*)(.*?)\1/g, '<em>$2</em>')
.replace(/\n/gim, '<br />')

// Paste the extracted blocks back in again
.replace(/<block(\d+)>/g, (m, index) => blocks[index]);
return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
};

Expand Down

0 comments on commit c236a71

Please sign in to comment.