Skip to content

Commit

Permalink
Improve highlighting of JavaScript
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Oct 26, 2024
1 parent 6260c7b commit b0efa25
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 136 deletions.
23 changes: 23 additions & 0 deletions llamafile/highlight_js.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ enum {
TICK_BACKSLASH,
REGEX,
REGEX_BACKSLASH,
REGEX_SQUARE,
REGEX_SQUARE_BACKSLASH,
};

enum {
Expand Down Expand Up @@ -141,6 +143,8 @@ void HighlightJs::feed(std::string *r, std::string_view input) {
*r += c;
if (c == '\\') {
t_ = REGEX_BACKSLASH;
} else if (c == '[') {
t_ = REGEX_SQUARE;
} else {
t_ = REGEX;
}
Expand Down Expand Up @@ -227,6 +231,8 @@ void HighlightJs::feed(std::string *r, std::string_view input) {
t_ = NORMAL;
} else if (c == '\\') {
t_ = REGEX_BACKSLASH;
} else if (c == '[') {
t_ = REGEX_SQUARE;
}
break;

Expand All @@ -235,6 +241,21 @@ void HighlightJs::feed(std::string *r, std::string_view input) {
t_ = REGEX;
break;

case REGEX_SQUARE:
// because /[/]/g is valid code
*r += c;
if (c == '\\') {
t_ = REGEX_SQUARE_BACKSLASH;
} else if (c == ']') {
t_ = REGEX;
}
break;

case REGEX_SQUARE_BACKSLASH:
*r += c;
t_ = REGEX_SQUARE;
break;

default:
__builtin_unreachable();
}
Expand Down Expand Up @@ -279,6 +300,8 @@ void HighlightJs::flush(std::string *r) {
case SLASH_STAR_STAR:
case REGEX:
case REGEX_BACKSLASH:
case REGEX_SQUARE:
case REGEX_SQUARE_BACKSLASH:
*r += HI_RESET;
break;
default:
Expand Down
254 changes: 153 additions & 101 deletions llamafile/is_keyword_js.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@

#include <string.h>

#define TOTAL_KEYWORDS 36
#define TOTAL_KEYWORDS 53
#define MIN_WORD_LENGTH 2
#define MAX_WORD_LENGTH 10
#define MAX_WORD_LENGTH 12
#define MIN_HASH_VALUE 2
#define MAX_HASH_VALUE 61
/* maximum key range = 60, duplicates = 0 */
#define MAX_HASH_VALUE 71
/* maximum key range = 70, duplicates = 0 */

#ifdef __GNUC__
__inline
Expand All @@ -52,32 +52,32 @@ hash (register const char *str, register size_t len)
{
static const unsigned char asso_values[] =
{
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 0, 15, 0,
0, 0, 10, 62, 10, 0, 62, 62, 10, 5,
0, 20, 62, 62, 30, 0, 20, 40, 15, 15,
25, 35, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
62, 62, 62, 62, 62, 62
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 10, 35, 20,
25, 25, 20, 72, 0, 0, 72, 72, 40, 20,
0, 15, 30, 72, 15, 5, 0, 35, 0, 10,
0, 45, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72, 72, 72, 72, 72,
72, 72, 72, 72, 72, 72
};
return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
}
Expand All @@ -88,132 +88,184 @@ is_keyword_js (register const char *str, register size_t len)
struct stringpool_t
{
char stringpool_str2[sizeof("in")];
char stringpool_str3[sizeof("new")];
char stringpool_str4[sizeof("case")];
char stringpool_str5[sizeof("catch")];
char stringpool_str6[sizeof("delete")];
char stringpool_str7[sizeof("default")];
char stringpool_str8[sizeof("debugger")];
char stringpool_str4[sizeof("this")];
char stringpool_str5[sizeof("throw")];
char stringpool_str6[sizeof("throws")];
char stringpool_str9[sizeof("interface")];
char stringpool_str10[sizeof("instanceof")];
char stringpool_str11[sizeof("import")];
char stringpool_str12[sizeof("if")];
char stringpool_str13[sizeof("let")];
char stringpool_str14[sizeof("else")];
char stringpool_str15[sizeof("class")];
char stringpool_str17[sizeof("finally")];
char stringpool_str18[sizeof("var")];
char stringpool_str19[sizeof("with")];
char stringpool_str20[sizeof("await")];
char stringpool_str11[sizeof("static")];
char stringpool_str13[sizeof("var")];
char stringpool_str14[sizeof("with")];
char stringpool_str15[sizeof("while")];
char stringpool_str16[sizeof("native")];
char stringpool_str17[sizeof("as")];
char stringpool_str18[sizeof("try")];
char stringpool_str19[sizeof("void")];
char stringpool_str20[sizeof("async")];
char stringpool_str21[sizeof("switch")];
char stringpool_str22[sizeof("do")];
char stringpool_str25[sizeof("const")];
char stringpool_str26[sizeof("static")];
char stringpool_str28[sizeof("continue")];
char stringpool_str30[sizeof("while")];
char stringpool_str22[sizeof("if")];
char stringpool_str23[sizeof("volatile")];
char stringpool_str24[sizeof("transient")];
char stringpool_str25[sizeof("await")];
char stringpool_str26[sizeof("import")];
char stringpool_str27[sizeof("finally")];
char stringpool_str28[sizeof("new")];
char stringpool_str29[sizeof("enum")];
char stringpool_str30[sizeof("implements")];
char stringpool_str31[sizeof("export")];
char stringpool_str32[sizeof("extends")];
char stringpool_str33[sizeof("for")];
char stringpool_str34[sizeof("this")];
char stringpool_str35[sizeof("throw")];
char stringpool_str36[sizeof("return")];
char stringpool_str39[sizeof("void")];
char stringpool_str40[sizeof("yield")];
char stringpool_str34[sizeof("case")];
char stringpool_str35[sizeof("catch")];
char stringpool_str37[sizeof("of")];
char stringpool_str38[sizeof("for")];
char stringpool_str39[sizeof("from")];
char stringpool_str40[sizeof("const")];
char stringpool_str42[sizeof("do")];
char stringpool_str43[sizeof("continue")];
char stringpool_str45[sizeof("super")];
char stringpool_str50[sizeof("break")];
char stringpool_str53[sizeof("try")];
char stringpool_str58[sizeof("function")];
char stringpool_str61[sizeof("typeof")];
char stringpool_str46[sizeof("return")];
char stringpool_str47[sizeof("package")];
char stringpool_str50[sizeof("yield")];
char stringpool_str51[sizeof("typeof")];
char stringpool_str52[sizeof("private")];
char stringpool_str53[sizeof("abstract")];
char stringpool_str54[sizeof("protected")];
char stringpool_str55[sizeof("break")];
char stringpool_str56[sizeof("delete")];
char stringpool_str57[sizeof("default")];
char stringpool_str58[sizeof("debugger")];
char stringpool_str62[sizeof("synchronized")];
char stringpool_str63[sizeof("function")];
char stringpool_str65[sizeof("class")];
char stringpool_str68[sizeof("let")];
char stringpool_str69[sizeof("else")];
char stringpool_str71[sizeof("public")];
};
static const struct stringpool_t stringpool_contents =
{
"in",
"new",
"case",
"catch",
"delete",
"default",
"debugger",
"this",
"throw",
"throws",
"interface",
"instanceof",
"import",
"if",
"let",
"else",
"class",
"finally",
"static",
"var",
"with",
"await",
"switch",
"do",
"const",
"static",
"continue",
"while",
"native",
"as",
"try",
"void",
"async",
"switch",
"if",
"volatile",
"transient",
"await",
"import",
"finally",
"new",
"enum",
"implements",
"export",
"extends",
"case",
"catch",
"of",
"for",
"this",
"throw",
"from",
"const",
"do",
"continue",
"super",
"return",
"void",
"package",
"yield",
"super",
"typeof",
"private",
"abstract",
"protected",
"break",
"try",
"delete",
"default",
"debugger",
"synchronized",
"function",
"typeof"
"class",
"let",
"else",
"public"
};
#define stringpool ((const char *) &stringpool_contents)
static const int wordlist[] =
{
-1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str2,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str3,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str4,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str5,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str6,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str7,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str8,
-1,
-1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str9,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str10,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str11,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str12,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str13,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str14,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str15,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str16,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str17,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str18,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str19,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str20,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str21,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str22,
-1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str23,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str24,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str25,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str26,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str27,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str28,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str29,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str30,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str31,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str32,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str33,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str34,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str35,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str36,
-1, -1,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str37,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str38,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str39,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str40,
-1, -1, -1, -1,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str42,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str43,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str45,
-1, -1, -1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str50,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str46,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str47,
-1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str50,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str51,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str52,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str53,
-1, -1, -1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str54,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str55,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str56,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str57,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str58,
-1, -1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str62,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str63,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str65,
-1, -1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str61
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str68,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str69,
-1,
(int)(size_t)&((struct stringpool_t *)0)->stringpool_str71
};

if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
Expand Down
Loading

0 comments on commit b0efa25

Please sign in to comment.