From 902fd25b532f1a5fdd71d520f8e5d51b8e6a85eb Mon Sep 17 00:00:00 2001
From: Rudi Grinberg <me@rgrinberg.com>
Date: Mon, 26 Dec 2022 09:21:17 -0600
Subject: [PATCH] fix(lsp): text edit application

Do not eat newlines when applying edits past the newline character.

Any edit where the character position goes beyond the newline char
should be truncated to right before the newline.

Signed-off-by: Rudi Grinberg <me@rgrinberg.com>

ps-id: ae5e0adc-18a7-450d-be78-589c4995e3d6
---
 CHANGES.md                      |  3 +++
 lsp/src/text_document.ml        | 46 +++++++++++++++++----------------
 lsp/test/text_document_tests.ml |  6 ++---
 3 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index ffb705152..e9afd6288 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -30,6 +30,9 @@
 
 - Fix semantic highlighting of "long identifiers," e.g., `Foo.Bar.x` (#932)
 
+- Fix syncing of document contents. Previously, whole line edits would
+  incorrectly eat the newline characters. (#971)
+
 # 1.14.2
 
 ## Fixes
diff --git a/lsp/src/text_document.ml b/lsp/src/text_document.ml
index 8cb7bab9e..e51e9eaed 100644
--- a/lsp/src/text_document.ml
+++ b/lsp/src/text_document.ml
@@ -18,14 +18,18 @@ let find_nth_nl =
     | n -> n
     | exception Outside -> len
 
+let newline = Uchar.of_char '\n'
+
 let find_utf8_pos =
-  let rec find_pos char dec =
-    if char = 0 || Uutf.decoder_line dec = 2 then Uutf.decoder_byte_count dec
+  let rec find_pos newline char dec =
+    if char = 0 then Uutf.decoder_byte_count dec
     else
       match Uutf.decode dec with
       | `Malformed _ | `Await -> raise Invalid_utf8
       | `End -> assert false
-      | `Uchar _ -> find_pos (char - 1) dec
+      | `Uchar u ->
+        if Uchar.equal u newline then Uutf.decoder_byte_count dec - 1
+        else find_pos newline (char - 1) dec
   in
   fun s ~start ~character ->
     let dec =
@@ -37,7 +41,7 @@ let find_utf8_pos =
       start
       (String.length s - start);
     assert (Uutf.decoder_line dec = 1);
-    find_pos character dec + start
+    find_pos newline character dec + start
 
 let find_offset_8 ~utf8 ~utf8_range:range =
   let { Range.start; end_ } = range in
@@ -62,35 +66,33 @@ let find_offset_8 ~utf8 ~utf8_range:range =
 
 let find_offset_16 ~utf8 ~utf16_range:range =
   let dec =
-    Uutf.decoder
-      ~nln:(`ASCII (Uchar.of_char '\n'))
-      ~encoding:`UTF_8
-      (`String utf8)
+    Uutf.decoder ~nln:(`ASCII newline) ~encoding:`UTF_8 (`String utf8)
   in
   let utf16_codepoint_size = 4 in
   let utf16_codepoints_buf = Bytes.create utf16_codepoint_size in
   let enc = Uutf.encoder `UTF_16LE `Manual in
   let rec find_char line char =
-    if char = 0 || Uutf.decoder_line dec = line + 2 then
-      Uutf.decoder_byte_count dec
+    if char = 0 then Uutf.decoder_byte_count dec
     else
       match Uutf.decode dec with
       | `Await -> raise Invalid_utf8
       | `End -> Uutf.decoder_byte_count dec
       | `Malformed _ ->
         invalid_arg "Text_document.find_offset: utf8 string is malformed"
-      | `Uchar _ as u ->
-        Uutf.Manual.dst enc utf16_codepoints_buf 0 utf16_codepoint_size;
-        (match Uutf.encode enc u with
-        | `Partial ->
-          (* we always have space for one character *)
-          assert false
-        | `Ok -> ());
-        let char =
-          let bytes_read = utf16_codepoint_size - Uutf.Manual.dst_rem enc in
-          char - (bytes_read / 2)
-        in
-        find_char line char
+      | `Uchar c as u ->
+        if Uchar.equal newline c then Uutf.decoder_byte_count dec - 1
+        else (
+          Uutf.Manual.dst enc utf16_codepoints_buf 0 utf16_codepoint_size;
+          (match Uutf.encode enc u with
+          | `Partial ->
+            (* we always have space for one character *)
+            assert false
+          | `Ok -> ());
+          let char =
+            let bytes_read = utf16_codepoint_size - Uutf.Manual.dst_rem enc in
+            char - (bytes_read / 2)
+          in
+          find_char line char)
   in
   let rec find_pos (pos : Position.t) =
     if Uutf.decoder_line dec = pos.line + 1 then
diff --git a/lsp/test/text_document_tests.ml b/lsp/test/text_document_tests.ml
index 266f36a53..0b3cae952 100644
--- a/lsp/test/text_document_tests.ml
+++ b/lsp/test/text_document_tests.ml
@@ -107,7 +107,7 @@ let%expect_test "replace first line" =
 
 let%expect_test "beyond max char" =
   let range = tuple_range (0, 0) (0, 100) in
-  test "foo\nbar\n" range ~change:"baz\n";
+  test "foo\nbar\n" range ~change:"baz";
   [%expect {|
     UTF16:
     baz\nbar\n
@@ -124,9 +124,9 @@ let%expect_test "entire line without newline" =
   test "xxx\n" (tuple_range (0, 0) (0, 4)) ~change:"baz";
   [%expect {|
     UTF16:
-    baz
+    baz\n
     UTF8:
-    baz |}];
+    baz\n |}];
   test "xxx\n" (tuple_range (0, 0) (1, 0)) ~change:"baz";
   [%expect {|
     UTF16: