Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamically load grammar libraries at runtime #432

Merged
merged 6 commits into from
Jul 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ target
helix-term/rustfmt.toml
helix-syntax/languages/
result
runtime/grammars
25 changes: 12 additions & 13 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions helix-core/src/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,14 +253,14 @@ where

let doc = Rope::from(doc);
use crate::syntax::{
Configuration, IndentationConfiguration, Lang, LanguageConfiguration, Loader,
Configuration, IndentationConfiguration, LanguageConfiguration, Loader,
};
use once_cell::sync::OnceCell;
let loader = Loader::new(Configuration {
language: vec![LanguageConfiguration {
scope: "source.rust".to_string(),
file_types: vec!["rs".to_string()],
language_id: Lang::Rust,
language_id: "Rust".to_string(),
highlight_config: OnceCell::new(),
//
roots: vec![],
Expand Down
12 changes: 6 additions & 6 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
Rope, RopeSlice, Tendril,
};

pub use helix_syntax::{get_language, get_language_name, Lang};
pub use helix_syntax::get_language;

use arc_swap::ArcSwap;

Expand All @@ -31,7 +31,7 @@ pub struct Configuration {
#[serde(rename_all = "kebab-case")]
pub struct LanguageConfiguration {
#[serde(rename = "name")]
pub(crate) language_id: Lang,
pub(crate) language_id: String,
pub scope: String, // source.rust
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
Expand Down Expand Up @@ -153,7 +153,7 @@ fn read_query(language: &str, filename: &str) -> String {

impl LanguageConfiguration {
fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
let language = get_language_name(self.language_id).to_ascii_lowercase();
let language = self.language_id.to_ascii_lowercase();

let highlights_query = read_query(&language, "highlights.scm");
// always highlight syntax errors
Expand All @@ -166,7 +166,7 @@ impl LanguageConfiguration {
if highlights_query.is_empty() {
None
} else {
let language = get_language(self.language_id);
let language = get_language(&crate::RUNTIME_DIR, &self.language_id).ok()?;
let config = HighlightConfiguration::new(
language,
&highlights_query,
Expand Down Expand Up @@ -198,7 +198,7 @@ impl LanguageConfiguration {
pub fn indent_query(&self) -> Option<&IndentQuery> {
self.indent_query
.get_or_init(|| {
let language = get_language_name(self.language_id).to_ascii_lowercase();
let language = self.language_id.to_ascii_lowercase();

let toml = load_runtime_file(&language, "indents.toml").ok()?;
toml::from_slice(toml.as_bytes()).ok()
Expand Down Expand Up @@ -1802,7 +1802,7 @@ mod test {
.map(String::from)
.collect();

let language = get_language(Lang::Rust);
let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap();
let config = HighlightConfiguration::new(
language,
&std::fs::read_to_string(
Expand Down
6 changes: 4 additions & 2 deletions helix-syntax/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ include = ["src/**/*", "languages/**/*", "build.rs", "!**/docs/**/*", "!**/test/

[dependencies]
tree-sitter = "0.19"
serde = { version = "1.0", features = ["derive"] }
libloading = "0.7"
anyhow = "1"

[build-dependencies]
cc = { version = "1", features = ["parallel"] }
cc = { version = "1" }
threadpool = { version = "1.0" }
anyhow = "1"
199 changes: 133 additions & 66 deletions helix-syntax/build.rs
Original file line number Diff line number Diff line change
@@ -1,79 +1,147 @@
use anyhow::{anyhow, Context, Result};
use std::fs;
use std::path::PathBuf;
use std::time::SystemTime;
use std::{
path::{Path, PathBuf},
process::Command,
};

use std::sync::mpsc::channel;

fn collect_tree_sitter_dirs(ignore: &[String]) -> Vec<String> {
fn collect_tree_sitter_dirs(ignore: &[String]) -> Result<Vec<String>> {
let mut dirs = Vec::new();
for entry in fs::read_dir("languages").unwrap().flatten() {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("languages");

for entry in fs::read_dir(path)? {
let entry = entry?;
let path = entry.path();
let dir = path.file_name().unwrap().to_str().unwrap().to_string();
if !ignore.contains(&dir) {
dirs.push(dir);

if !entry.file_type()?.is_dir() {
continue;
}
}
dirs
}

fn collect_src_files(dir: &str) -> (Vec<String>, Vec<String>) {
eprintln!("Collect files for {}", dir);
let dir = path.file_name().unwrap().to_str().unwrap().to_string();

let mut c_files = Vec::new();
let mut cpp_files = Vec::new();
let path = PathBuf::from("languages").join(&dir).join("src");
for entry in fs::read_dir(path).unwrap().flatten() {
let path = entry.path();
if path
.file_stem()
.unwrap()
.to_str()
.unwrap()
.starts_with("binding")
{
// filter ignores
if ignore.contains(&dir) {
continue;
}
if let Some(ext) = path.extension() {
if ext == "c" {
c_files.push(path.to_str().unwrap().to_string());
} else if ext == "cc" || ext == "cpp" || ext == "cxx" {
cpp_files.push(path.to_str().unwrap().to_string());
}
}
dirs.push(dir)
}
(c_files, cpp_files)
}

fn build_c(files: Vec<String>, language: &str) {
let mut build = cc::Build::new();
for file in files {
build
.file(&file)
.include(PathBuf::from(file).parent().unwrap())
.pic(true)
.warnings(false);
}
build.compile(&format!("tree-sitter-{}-c", language));
Ok(dirs)
}

fn build_cpp(files: Vec<String>, language: &str) {
let mut build = cc::Build::new();
#[cfg(unix)]
const DYLIB_EXTENSION: &str = "so";

#[cfg(windows)]
const DYLIB_EXTENSION: &str = "dll";

let flag = if build.get_compiler().is_like_msvc() {
"/std:c++17"
fn build_library(src_path: &Path, language: &str) -> Result<()> {
let header_path = src_path;
// let grammar_path = src_path.join("grammar.json");
let parser_path = src_path.join("parser.c");
let mut scanner_path = src_path.join("scanner.c");

let scanner_path = if scanner_path.exists() {
Some(scanner_path)
} else {
"-std=c++14"
scanner_path.set_extension("cc");
if scanner_path.exists() {
Some(scanner_path)
} else {
None
}
};
let parser_lib_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../runtime/grammars");
let mut library_path = parser_lib_path.join(language);
library_path.set_extension(DYLIB_EXTENSION);

for file in files {
build
.file(&file)
.include(PathBuf::from(file).parent().unwrap())
.pic(true)
.warnings(false)
.cpp(true)
.flag_if_supported(flag);
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
.with_context(|| "Failed to compare source and binary timestamps")?;

if !recompile {
return Ok(());
}
let mut config = cc::Build::new();
config.cpp(true).opt_level(2).cargo_metadata(false);
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
command.current_dir(src_path);
for (key, value) in compiler.env() {
command.env(key, value);
}
build.compile(&format!("tree-sitter-{}-cpp", language));

if cfg!(windows) {
command
.args(&["/nologo", "/LD", "/I"])
.arg(header_path)
.arg("/Od");
if let Some(scanner_path) = scanner_path.as_ref() {
command.arg(scanner_path);
}

command
.arg(parser_path)
.arg("/link")
.arg(format!("/out:{}", library_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fPIC")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(&library_path)
.arg("-O2");
if let Some(scanner_path) = scanner_path.as_ref() {
if scanner_path.extension() == Some("c".as_ref()) {
command.arg("-xc").arg("-std=c99").arg(scanner_path);
} else {
command.arg(scanner_path);
}
}
command.arg("-xc").arg(parser_path);
}

let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
if !output.status.success() {
return Err(anyhow!(
"Parser compilation failed.\nStdout: {}\nStderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
));
}

Ok(())
}
fn needs_recompile(
lib_path: &Path,
parser_c_path: &Path,
scanner_path: &Option<PathBuf>,
) -> Result<bool> {
if !lib_path.exists() {
return Ok(true);
}
let lib_mtime = mtime(lib_path)?;
if mtime(parser_c_path)? > lib_mtime {
return Ok(true);
}
if let Some(scanner_path) = scanner_path {
if mtime(scanner_path)? > lib_mtime {
return Ok(true);
}
}
Ok(false)
}

fn mtime(path: &Path) -> Result<SystemTime> {
Ok(fs::metadata(path)?.modified()?)
}

fn build_dir(dir: &str, language: &str) {
Expand All @@ -92,22 +160,21 @@ fn build_dir(dir: &str, language: &str) {
eprintln!("You can fix in using 'git submodule init && git submodule update --recursive'.");
std::process::exit(1);
}
let (c, cpp) = collect_src_files(dir);
if !c.is_empty() {
build_c(c, language);
}
if !cpp.is_empty() {
build_cpp(cpp, language);
}

let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("languages")
.join(dir)
.join("src");

build_library(&path, language).unwrap();
}

fn main() {
let ignore = vec![
"tree-sitter-typescript".to_string(),
"tree-sitter-haskell".to_string(), // aarch64 failures: https://github.com/tree-sitter/tree-sitter-haskell/issues/34
".DS_Store".to_string(),
];
let dirs = collect_tree_sitter_dirs(&ignore);
let dirs = collect_tree_sitter_dirs(&ignore).unwrap();

let mut n_jobs = 0;
let pool = threadpool::Builder::new().build(); // by going through the builder, it'll use num_cpus
Expand All @@ -118,7 +185,7 @@ fn main() {
n_jobs += 1;

pool.execute(move || {
let language = &dir[12..]; // skip tree-sitter- prefix
let language = &dir.strip_prefix("tree-sitter-").unwrap();
build_dir(&dir, language);

// report progress
Expand Down
Loading