Skip to content

Commit

Permalink
feat(tree-sitter): support file to language association through globs
Browse files Browse the repository at this point in the history
* new field (file-globs) in the config allowing to match a language by globs (alternative to file-types)
* added tree-sitter ssh-client-config for the ~/.ssh/config file
* added the .git/config file to tree-sitter git-config
  • Loading branch information
midnightexigent committed May 10, 2022
1 parent 40647f0 commit 29ab179
Show file tree
Hide file tree
Showing 6 changed files with 378 additions and 9 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions book/src/generated/lang-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
| scala || || `metals` |
| solidity || | | `solc` |
| sql || | | |
| sshclientconfig || | | |
| svelte || || `svelteserver` |
| swift || | | `sourcekit-lsp` |
| tablegen |||| |
Expand Down
2 changes: 2 additions & 0 deletions helix-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,7 @@ chrono = { version = "0.4", default-features = false, features = ["alloc", "std"
etcetera = "0.3"
textwrap = "0.15.0"

globset = "0.4"

[dev-dependencies]
quickcheck = { version = "1", default-features = false }
46 changes: 38 additions & 8 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{
};

use arc_swap::{ArcSwap, Guard};
use globset::{Glob, GlobSet, GlobSetBuilder};
use slotmap::{DefaultKey as LayerId, HopSlotMap};

use std::{
Expand All @@ -25,6 +26,16 @@ use serde::{Deserialize, Serialize};

use helix_loader::grammar::{get_language, load_runtime_file};

fn deserialize_glob_set<'de, D: serde::Deserializer<'de>>(
deserializer: D,
) -> Result<GlobSet, D::Error> {
let globs = Vec::<String>::deserialize(deserializer)?;
let mut builder = GlobSetBuilder::new();
for glob in globs {
builder.add(Glob::new(&glob).map_err(serde::de::Error::custom)?);
}
builder.build().map_err(serde::de::Error::custom)
}
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error>
where
D: serde::Deserializer<'de>,
Expand Down Expand Up @@ -61,11 +72,14 @@ pub struct Configuration {
pub struct LanguageConfiguration {
#[serde(rename = "name")]
pub language_id: String, // c-sharp, rust
pub scope: String, // source.rust
pub scope: String, // source.rust
#[serde(default)]
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
#[serde(default, skip_serializing, deserialize_with = "deserialize_glob_set")]
pub file_globs: GlobSet,
#[serde(default)]
pub shebangs: Vec<String>, // interpreter(s) associated with language
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
pub comment_token: Option<String>,
pub max_line_length: Option<usize>,

Expand Down Expand Up @@ -436,6 +450,7 @@ pub struct Loader {
// highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>,
language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
language_config_ids_by_globset: Vec<(GlobSet, usize)>,
language_config_ids_by_shebang: HashMap<String, usize>,

scopes: ArcSwap<Vec<String>>,
Expand All @@ -447,6 +462,7 @@ impl Loader {
language_configs: Vec::new(),
language_config_ids_by_file_type: HashMap::new(),
language_config_ids_by_shebang: HashMap::new(),
language_config_ids_by_globset: Vec::new(),
scopes: ArcSwap::from_pointee(Vec::new()),
};

Expand All @@ -465,6 +481,9 @@ impl Loader {
.language_config_ids_by_shebang
.insert(shebang.clone(), language_id);
}
loader
.language_config_ids_by_globset
.push((config.file_globs.clone(), language_id));

loader.language_configs.push(Arc::new(config));
}
Expand All @@ -478,16 +497,27 @@ impl Loader {
let configuration_id = path
.file_name()
.and_then(|n| n.to_str())
.and_then(|file_name| self.language_config_ids_by_file_type.get(file_name))
.and_then(|file_name| {
self.language_config_ids_by_file_type
.get(file_name)
.copied()
})
.or_else(|| {
path.extension()
.and_then(|extension| extension.to_str())
.and_then(|extension| self.language_config_ids_by_file_type.get(extension))
});

configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
.and_then(|extension| {
self.language_config_ids_by_file_type
.get(extension)
.copied()
})
})
.or_else(|| {
self.language_config_ids_by_globset
.iter()
.find_map(|(gs, id)| if gs.is_match(path) { Some(*id) } else { None })
})?;

// TODO: content_regex handling conflict resolution
self.language_configs.get(configuration_id).cloned()
}

pub fn language_config_for_shebang(&self, source: &Rope) -> Option<Arc<LanguageConfiguration>> {
Expand Down
13 changes: 12 additions & 1 deletion languages.toml
Original file line number Diff line number Diff line change
Expand Up @@ -946,8 +946,8 @@ source = { git = "https://github.com/tree-sitter/tree-sitter-regex", rev = "e1cf
name = "git-config"
scope = "source.gitconfig"
roots = []
# TODO: allow specifying file-types as a regex so we can read directory names (e.g. `.git/config`)
file-types = [".gitmodules", ".gitconfig"]
file-globs = ["*.git/config"]
injection-regex = "git-config"
comment-token = "#"
indent = { tab-width = 4, unit = "\t" }
Expand Down Expand Up @@ -1342,3 +1342,14 @@ indent = { tab-width = 4, unit = " " }
[[grammar]]
name = "odin"
source = { git = "https://github.com/MineBill/tree-sitter-odin", rev = "da885f4a387f169b9b69fe0968259ee257a8f69a" }

[[language]]
name = "sshclientconfig"
scope = "source.sshclientconfig"
file-globs = ["*.ssh/config"]
roots = []

[[grammar]]
name = "sshclientconfig"
source = { git = "https://github.com/metio/tree-sitter-ssh-client-config", rev = "769d7a01a2e5493b4bb5a51096c6bf4be130b024" }

Loading

0 comments on commit 29ab179

Please sign in to comment.