From 4ffee3c1aa6bdbba0734f6a981fe378b8b436eb5 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 15 Nov 2022 18:32:23 -0500 Subject: [PATCH] feature: allow utf8 encoding Signed-off-by: Rudi Grinberg ps-id: ac0de96a-2c5f-4c90-b1af-40acdc1a8344 --- CHANGES.md | 2 + lsp/src/text_document.ml | 124 ++++++++++++++++++----- lsp/src/text_document.mli | 3 +- lsp/test/text_document_tests.ml | 82 +++++++++++---- ocaml-lsp-server/src/document.ml | 5 +- ocaml-lsp-server/src/document.mli | 1 + ocaml-lsp-server/src/import.ml | 1 + ocaml-lsp-server/src/inference.ml | 2 + ocaml-lsp-server/src/ocaml_lsp_server.ml | 33 +++++- ocaml-lsp-server/src/state.ml | 10 +- ocaml-lsp-server/src/state.mli | 11 +- 11 files changed, 216 insertions(+), 58 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 74a65efe4..5954b924c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,6 +9,8 @@ - Add custom ocamllsp/hoverExtended request (#561) +- Support utf-8 position encoding clients (#919) + # 1.14.2 ## Fixes diff --git a/lsp/src/text_document.ml b/lsp/src/text_document.ml index d47114594..3c29d3fa6 100644 --- a/lsp/src/text_document.ml +++ b/lsp/src/text_document.ml @@ -3,7 +3,64 @@ module String = StringLabels exception Invalid_utf8 -let find_offset ~utf8 ~utf16_range:range = +exception Outside + +let find_nth_nl = + let rec find_nth_nl str nth pos len = + if nth = 0 then pos + else if pos >= len then raise Outside + else if str.[pos] = '\n' then find_nth_nl str (nth - 1) (pos + 1) len + else find_nth_nl str nth (pos + 1) len + in + fun s ~nth ~start -> + let len = String.length s in + match find_nth_nl s nth start len with + | n -> n + | exception Outside -> len + +let find_utf8_pos = + let rec find_pos char dec = + if char = 0 || Uutf.decoder_line dec = 2 then Uutf.decoder_byte_count dec + else + match Uutf.decode dec with + | `Malformed _ | `Await -> raise Invalid_utf8 + | `End -> assert false + | `Uchar _ -> find_pos (char - 1) dec + in + fun s ~start ~character -> + let dec = + Uutf.decoder ~nln:(`ASCII (Uchar.of_char '\n')) ~encoding:`UTF_8 `Manual + in + Uutf.Manual.src + dec + (Bytes.unsafe_of_string s) + start + (String.length s - start); + assert (Uutf.decoder_line dec = 1); + find_pos character dec + start + +let find_offset_8 ~utf8 ~utf8_range:range = + let { Range.start; end_ } = range in + let start_line_offset = find_nth_nl utf8 ~nth:start.line ~start:0 in + let end_line_offset = + if end_.line = start.line then start_line_offset + else if end_.line > start.line then + find_nth_nl utf8 ~nth:(end_.line - start.line) ~start:start_line_offset + else invalid_arg "inverted range" + in + let make_offset ~start ~character = + if start = String.length utf8 then start + else find_utf8_pos utf8 ~start ~character + in + let start_offset = + make_offset ~start:start_line_offset ~character:start.character + in + let end_offset = + make_offset ~start:end_line_offset ~character:end_.character + in + (start_offset, end_offset) + +let find_offset_16 ~utf8 ~utf16_range:range = let dec = Uutf.decoder ~nln:(`ASCII (Uchar.of_char '\n')) @@ -57,39 +114,50 @@ let find_offset ~utf8 ~utf16_range:range = computed based on UTF-16. Therefore we reencode every file into utf16 for analysis. *) -type t = TextDocumentItem.t +type t = + { document : TextDocumentItem.t + ; position_encoding : [ `UTF8 | `UTF16 ] + } -let text (t : TextDocumentItem.t) = t.text +let text (t : t) = t.document.text -let make (t : DidOpenTextDocumentParams.t) = t.textDocument +let make ~position_encoding (t : DidOpenTextDocumentParams.t) = + { document = t.textDocument; position_encoding } -let documentUri (t : TextDocumentItem.t) = t.uri +let documentUri (t : t) = t.document.uri -let version (t : TextDocumentItem.t) = t.version +let version (t : t) = t.document.version -let languageId (t : TextDocumentItem.t) = t.languageId +let languageId (t : t) = t.document.languageId -let apply_content_change ?version (t : TextDocumentItem.t) +let apply_content_change ?version (t : t) (change : TextDocumentContentChangeEvent.t) = - (* Changes can only be applied using utf16 offsets *) - let version = + let document = + match change.range with + | None -> { t.document with text = change.text } + | Some range -> + let start_offset, end_offset = + let utf8 = t.document.text in + match t.position_encoding with + | `UTF16 -> find_offset_16 ~utf8 ~utf16_range:range + | `UTF8 -> find_offset_8 ~utf8 ~utf8_range:range + in + let text = + String.concat + ~sep:"" + [ String.sub t.document.text ~pos:0 ~len:start_offset + ; change.text + ; String.sub + t.document.text + ~pos:end_offset + ~len:(String.length t.document.text - end_offset) + ] + in + { t.document with text } + in + let document = match version with - | None -> t.version + 1 - | Some version -> version + | None -> document + | Some version -> { document with version } in - match change.range with - | None -> { t with version; text = change.text } - | Some utf16_range -> - let start_offset, end_offset = find_offset ~utf8:t.text ~utf16_range in - let text = - String.concat - ~sep:"" - [ String.sub t.text ~pos:0 ~len:start_offset - ; change.text - ; String.sub - t.text - ~pos:end_offset - ~len:(String.length t.text - end_offset) - ] - in - { t with text; version } + { t with document } diff --git a/lsp/src/text_document.mli b/lsp/src/text_document.mli index f572df7f4..909384c0b 100644 --- a/lsp/src/text_document.mli +++ b/lsp/src/text_document.mli @@ -2,7 +2,8 @@ open Types type t -val make : DidOpenTextDocumentParams.t -> t +val make : + position_encoding:[ `UTF8 | `UTF16 ] -> DidOpenTextDocumentParams.t -> t val languageId : t -> string diff --git a/lsp/test/text_document_tests.ml b/lsp/test/text_document_tests.ml index 52ddd5608..996d7ef2b 100644 --- a/lsp/test/text_document_tests.ml +++ b/lsp/test/text_document_tests.ml @@ -12,60 +12,100 @@ let tuple_range start end_ = } let test text range ~change = - let td = - let uri = DocumentUri.of_path "" in - let version = 1 in - let languageId = "fake language" in - let textDocument = { TextDocumentItem.uri; version; languageId; text } in - Text_document.make { DidOpenTextDocumentParams.textDocument } + let test position_encoding = + let td = + let uri = DocumentUri.of_path "" in + let version = 1 in + let languageId = "fake language" in + let textDocument = { TextDocumentItem.uri; version; languageId; text } in + Text_document.make + ~position_encoding + { DidOpenTextDocumentParams.textDocument } + in + let td = + Text_document.apply_content_change + td + (TextDocumentContentChangeEvent.create ?range ~text:change ()) + in + (match position_encoding with + | `UTF8 -> print_endline "UTF8:" + | `UTF16 -> print_endline "UTF16:"); + print_endline (String.escaped (Text_document.text td)) in - let td = - Text_document.apply_content_change - td - (TextDocumentContentChangeEvent.create ?range ~text:change ()) - in - print_endline (String.escaped (Text_document.text td)) + test `UTF16; + test `UTF8 let%expect_test "first line insert" = let range = tuple_range (0, 1) (0, 3) in test "foo bar baz" range ~change:"XXXX"; - [%expect {| fXXXX bar baz |}] + [%expect {| + UTF16: + fXXXX bar baz + UTF8: + fXXXX bar baz |}] let%expect_test "no range" = let range = None in test "foo bar baz" range ~change:"XXXX"; - [%expect {| XXXX |}] + [%expect {| + UTF16: + XXXX + UTF8: + XXXX |}] let%expect_test "replace second line" = let range = tuple_range (1, 0) (2, 0) in test "foo\n\bar\nbaz\n" range ~change:"XXXX\n"; [%expect {| + UTF16: + foo\nXXXX\nbaz\n + UTF8: foo\nXXXX\nbaz\n |}] let%expect_test "edit in second line" = let range = tuple_range (1, 1) (1, 2) in - test "foo\n\bar\nbaz\n" range ~change:"-XXX-"; - [%expect {| - foo\n\b-XXX-r\nbaz\n |}] + test "foo\nbar\nbaz\n" range ~change:"-XXX-"; + [%expect + {| + UTF16: + foo\nb-XXX-r\nbaz\n + UTF8: + foo\nb-XXX-r\nbaz\n |}] let%expect_test "insert at the end" = let range = tuple_range (3, 1) (4, 0) in test "foo\n\bar\nbaz\n" range ~change:"XXX"; - [%expect {| + [%expect + {| + UTF16: + foo\n\bar\nbaz\nXXX + UTF8: foo\n\bar\nbaz\nXXX |}] let%expect_test "insert at the beginning" = let range = tuple_range (0, 0) (0, 0) in test "foo\n\bar\nbaz\n" range ~change:"XXX\n"; - [%expect {| + [%expect + {| + UTF16: + XXX\nfoo\n\bar\nbaz\n + UTF8: XXX\nfoo\n\bar\nbaz\n |}] let%expect_test "replace first line" = let range = tuple_range (0, 0) (1, 0) in test "foo\nbar\n" range ~change:"baz\n"; - [%expect {| baz\nbar\n |}] + [%expect {| + UTF16: + baz\nbar\n + UTF8: + baz\nbar\n |}] let%expect_test "beyond max char" = let range = tuple_range (0, 0) (0, 100) in test "foo\nbar\n" range ~change:"baz\n"; - [%expect {| baz\n |}] + [%expect {| + UTF16: + baz\n + UTF8: + baz\nbar\n |}] diff --git a/ocaml-lsp-server/src/document.ml b/ocaml-lsp-server/src/document.ml index 70d311b72..fd45fdb7f 100644 --- a/ocaml-lsp-server/src/document.ml +++ b/ocaml-lsp-server/src/document.ml @@ -216,9 +216,10 @@ let make_merlin wheel merlin_db pipeline tdoc syntax = in Merlin { merlin_config; tdoc; pipeline; timer; syntax } -let make wheel config pipeline (doc : DidOpenTextDocumentParams.t) = +let make wheel config pipeline (doc : DidOpenTextDocumentParams.t) + ~position_encoding = Fiber.of_thunk (fun () -> - let tdoc = Text_document.make doc in + let tdoc = Text_document.make ~position_encoding doc in let syntax = Syntax.of_text_document tdoc in match syntax with | Ocaml | Reason -> make_merlin wheel config pipeline tdoc syntax diff --git a/ocaml-lsp-server/src/document.mli b/ocaml-lsp-server/src/document.mli index 09159dcdb..4e793f265 100644 --- a/ocaml-lsp-server/src/document.mli +++ b/ocaml-lsp-server/src/document.mli @@ -35,6 +35,7 @@ val make : -> Merlin_config.DB.t -> Single_pipeline.t -> DidOpenTextDocumentParams.t + -> position_encoding:[ `UTF8 | `UTF16 ] -> t Fiber.t val uri : t -> Uri.t diff --git a/ocaml-lsp-server/src/import.ml b/ocaml-lsp-server/src/import.ml index 14caa8cc9..4bdf76522 100644 --- a/ocaml-lsp-server/src/import.ml +++ b/ocaml-lsp-server/src/import.ml @@ -233,6 +233,7 @@ include struct module OptionalVersionedTextDocumentIdentifier = OptionalVersionedTextDocumentIdentifier module ParameterInformation = ParameterInformation + module PositionEncodingKind = PositionEncodingKind module ProgressParams = ProgressParams module ProgressToken = ProgressToken module PublishDiagnosticsParams = PublishDiagnosticsParams diff --git a/ocaml-lsp-server/src/inference.ml b/ocaml-lsp-server/src/inference.ml index a85852c16..837197d28 100644 --- a/ocaml-lsp-server/src/inference.ml +++ b/ocaml-lsp-server/src/inference.ml @@ -53,7 +53,9 @@ let open_document_from_file (state : State.t) uri = DidOpenTextDocumentParams.create ~textDocument:text_document in let+ doc = + let position_encoding = State.position_encoding state in Document.make + ~position_encoding (State.wheel state) state.merlin_config state.merlin diff --git a/ocaml-lsp-server/src/ocaml_lsp_server.ml b/ocaml-lsp-server/src/ocaml_lsp_server.ml index dbd0c4f09..d9719f0da 100644 --- a/ocaml-lsp-server/src/ocaml_lsp_server.ml +++ b/ocaml-lsp-server/src/ocaml_lsp_server.ml @@ -117,6 +117,15 @@ let initialize_info (client_capabilities : ClientCapabilities.t) : ~full ())) in + let positionEncoding = + let open Option.O in + let* general = client_capabilities.general in + let* options = general.positionEncodings in + List.find_map + ([ UTF8; UTF16 ] : PositionEncodingKind.t list) + ~f:(fun encoding -> + Option.some_if (List.mem options ~equal:Poly.equal encoding) encoding) + in ServerCapabilities.create ~textDocumentSync ~hoverProvider:(`Bool true) @@ -139,6 +148,7 @@ let initialize_info (client_capabilities : ClientCapabilities.t) : ~renameProvider ~workspace ~executeCommandProvider + ?positionEncoding () in let serverInfo = @@ -236,7 +246,16 @@ let on_initialize server (ip : InitializeParams.t) = let+ () = Fiber.Pool.task state.detached ~f:(fun () -> Dune.run dune) in dune in - let state = State.initialize state ip workspaces dune diagnostics in + let initialize_info = initialize_info ip.capabilities in + let state = + let position_encoding = + match initialize_info.capabilities.positionEncoding with + | None | Some UTF16 -> `UTF16 + | Some UTF8 -> `UTF8 + | Some UTF32 | Some (Other _) -> assert false + in + State.initialize state ~position_encoding ip workspaces dune diagnostics + in let state = match ip.trace with | None -> state @@ -254,7 +273,7 @@ let on_initialize server (ip : InitializeParams.t) = ; _ } -> Reply.later (fun send -> - let* () = send (initialize_info ip.capabilities) in + let* () = send initialize_info in let register = RegistrationParams.create ~registrations: @@ -279,7 +298,7 @@ let on_initialize server (ip : InitializeParams.t) = Server.request server (Server_request.ClientRegisterCapability register)) - | _ -> Reply.now (initialize_info ip.capabilities) + | _ -> Reply.now initialize_info in (resp, state) @@ -911,7 +930,13 @@ let on_notification server (notification : Client_notification.t) : match notification with | TextDocumentDidOpen params -> let* doc = - Document.make (State.wheel state) state.merlin_config state.merlin params + let position_encoding = State.position_encoding state in + Document.make + ~position_encoding + (State.wheel state) + state.merlin_config + state.merlin + params in assert (Document_store.get_opt store params.textDocument.uri = None); let* () = Document_store.open_document store doc in diff --git a/ocaml-lsp-server/src/state.ml b/ocaml-lsp-server/src/state.ml index 943f6f3f8..c6a9bd253 100644 --- a/ocaml-lsp-server/src/state.ml +++ b/ocaml-lsp-server/src/state.ml @@ -8,6 +8,7 @@ type init = ; dune : Dune.t ; exp_client_caps : Client.Experimental_capabilities.t ; diagnostics : Diagnostics.t + ; position_encoding : [ `UTF16 | `UTF8 ] } type hover_extended = { mutable history : (Uri.t * Position.t * int) option } @@ -66,12 +67,18 @@ let dune t = | Uninitialized -> assert false | Initialized init -> init.dune +let position_encoding t = + match t.init with + | Uninitialized -> assert false + | Initialized init -> init.position_encoding + let diagnostics t = match t.init with | Uninitialized -> assert false | Initialized init -> init.diagnostics -let initialize t (params : InitializeParams.t) workspaces dune diagnostics = +let initialize t ~position_encoding (params : InitializeParams.t) workspaces + dune diagnostics = assert (t.init = Uninitialized); { t with init = @@ -80,6 +87,7 @@ let initialize t (params : InitializeParams.t) workspaces dune diagnostics = ; workspaces ; dune ; diagnostics + ; position_encoding ; exp_client_caps = Client.Experimental_capabilities.of_opt_json params.capabilities.experimental diff --git a/ocaml-lsp-server/src/state.mli b/ocaml-lsp-server/src/state.mli index 5f47088f3..49ef2af71 100644 --- a/ocaml-lsp-server/src/state.mli +++ b/ocaml-lsp-server/src/state.mli @@ -8,6 +8,7 @@ type init = ; dune : Dune.t ; exp_client_caps : Client.Experimental_capabilities.t ; diagnostics : Diagnostics.t + ; position_encoding : [ `UTF16 | `UTF8 ] } (** State specific to the hoverExtended request. *) @@ -42,12 +43,20 @@ val create : -> wheel:Lev_fiber.Timer.Wheel.t -> t +val position_encoding : t -> [ `UTF16 | `UTF8 ] + val wheel : t -> Lev_fiber.Timer.Wheel.t val initialize_params : t -> InitializeParams.t val initialize : - t -> InitializeParams.t -> Workspaces.t -> Dune.t -> Diagnostics.t -> t + t + -> position_encoding:[ `UTF16 | `UTF8 ] + -> InitializeParams.t + -> Workspaces.t + -> Dune.t + -> Diagnostics.t + -> t val workspace_root : t -> Uri.t