Skip to content

Commit

Permalink
Merge pull request #186 from mrodz/185-bug-crash-when-importing-a-mod…
Browse files Browse the repository at this point in the history
…ule-that-contains-a-string

fix edge cases with character escaping
  • Loading branch information
mrodz authored Feb 19, 2024
2 parents 81056ef + 9ef8d57 commit ed9ce3d
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 51 deletions.
2 changes: 1 addition & 1 deletion bytecode/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ impl MScriptFile {
// instruction (w. Args) syntax: `{id} arg1 arg2 "multi arg" arg4\0`
// where {id}: u8
[instruction, b' ', args @ .., 0x00] if in_function => {
let args = split_string(String::from_utf8_lossy(args))?;
let args = split_string(String::from_utf8_lossy(args).as_ref())?;

let instruction = Instruction::new(*instruction, args);

Expand Down
33 changes: 25 additions & 8 deletions bytecode/src/instruction.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! All of the implementations for each bytecode instruction are found here.
//! All the implementations for each bytecode instruction are found here.
use super::context::Ctx;
use super::function::InstructionExitState;
Expand Down Expand Up @@ -505,7 +505,7 @@ pub mod implementations {
let raw_str = match args.len() {
0 => "",
1 => &args[0],
_ => bail!("make_str requires 0 arguments (empty string) or one argument (the string)"),
_ => bail!("make_str requires 0 arguments (empty string) or one argument (the string), but got {args:?}"),
};

let var = crate::string!(raw raw_str);
Expand Down Expand Up @@ -765,7 +765,7 @@ pub mod implementations {
}
ctx.pop_frame();

return Ok(())
return Ok(());
}
_ => bail!("missing argument, and the last item in the local stack ({last:?}, S:{:#?}) is not a function.", ctx.get_local_operating_stack()),
}
Expand Down Expand Up @@ -1483,17 +1483,27 @@ pub mod implementations {
/// println!("{combined:?}"); // Ok(["Hello World"])
///
/// ```
pub fn split_string(string: Cow<str>) -> Result<Box<[String]>> {
pub fn split_string(string: &str) -> Result<Box<[String]>> {
split_string_v2(string, true)
}

/// Choose whether the output is one string, or if spaces/quotes
/// are delimiters for many outputs.
pub fn split_string_v2(string: &str, multi_target: bool) -> Result<Box<[String]>> {
let mut result = Vec::<String>::new();
let mut buf = String::new();
let mut in_quotes = false;
let mut escaping = false;

for char in string.chars() {
if !in_quotes && (char.is_whitespace()) {
if !buf.is_empty() {
result.push(buf.to_string());
buf.clear();
if multi_target {
if !buf.is_empty() {
result.push(buf.to_string());
buf.clear();
}
} else {
buf.push(char);
}
continue;
}
Expand All @@ -1514,7 +1524,7 @@ pub fn split_string(string: Cow<str>) -> Result<Box<[String]>> {
continue;
}

if in_quotes {
if multi_target && in_quotes {
result.push(buf.to_string());
buf.clear();
}
Expand Down Expand Up @@ -1551,8 +1561,15 @@ pub fn split_string(string: Cow<str>) -> Result<Box<[String]>> {
bail!("found EOL while parsing string: `{string}`")
} else {
if !buf.is_empty() {
// flush buffer at the end of processing
result.push(buf.to_string());
}

if result.is_empty() {
// edge case
result.push(String::new());
}

Ok(result.into_boxed_slice())
}
}
Expand Down
2 changes: 1 addition & 1 deletion bytecode/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub mod compilation_bridge {
use crate::instruction_constants::{BIN_TO_REPR, REPR_TO_BIN};
use std::borrow::Cow;

pub use crate::instruction::split_string;
pub use crate::instruction::{split_string, split_string_v2};

pub use crate::file::{MScriptFile, MScriptFileBuilder};
pub use crate::instruction::Instruction;
Expand Down
2 changes: 1 addition & 1 deletion bytecode/src/variables/ops/mul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl std::ops::Mul for &Primitive {
}

/// we cannot use built-in repeat, since Primitive does not implement Copy.
fn repeat_vec(original: &Vec<Primitive>, new_size: usize) -> Result<Vec<Primitive>> {
fn repeat_vec(original: &[Primitive], new_size: usize) -> Result<Vec<Primitive>> {
let len = original.len();
let Some(new_size) = len.checked_mul(new_size) else {
bail!("new size is too large")
Expand Down
3 changes: 1 addition & 2 deletions bytecode_dev_transpiler/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use anyhow::{bail, Result};
use bytecode::compilation_bridge::*;
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use std::borrow::Cow;
use std::fs::File;
use std::io::{BufRead, BufReader, Seek, Write};
use std::path::Path;
Expand Down Expand Up @@ -154,7 +153,7 @@ pub fn transpile_file(path: &str, new_path: &str) -> Result<()> {
pb.tick();
}

let arguments = split_string(Cow::Borrowed(args))?;
let arguments = split_string(args)?;

let name = name.trim_start();

Expand Down
58 changes: 30 additions & 28 deletions compiler/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,25 +243,26 @@ impl CompiledItem {
}

pub fn repr(&self, use_string_version: bool) -> Result<String> {
fn fix_arg_if_needed(arg: &String) -> Result<Cow<String>> {
let starts = arg.starts_with('\"');
let ends = arg.ends_with('\"');

if starts ^ ends {
bail!("non-matching `\"` on arg {arg}")
}

let has_quotes = starts && ends;

if !has_quotes && arg.contains(' ') {
let mut combined = String::with_capacity(arg.len() + 2);
combined.push('"');
combined.push_str(arg);
combined.push('"');
Ok(Cow::Owned(combined))
} else {
Ok(Cow::Borrowed(arg))
}
fn fix_arg_if_needed(arg: &str) -> Result<Cow<str>> {
Ok(Cow::Owned("\"".to_owned() + arg + "\""))
// let starts = arg.starts_with('"');
// let ends = arg.ends_with('"') && !arg.ends_with(r#"\""#);
//
// if starts ^ ends {
// bail!("non-matching `\"` on arg {arg} ({starts} & {ends})")
// }
//
// let has_quotes = starts && ends;
//
// if !has_quotes && arg.contains(' ') {
// let mut combined = String::with_capacity(arg.len() + 2);
// combined.push('"');
// combined.push_str(arg);
// combined.push('"');
// Ok(Cow::Owned(combined))
// } else {
// Ok(Cow::Borrowed(arg))
// }
}

match self {
Expand Down Expand Up @@ -291,8 +292,9 @@ impl CompiledItem {
if arguments.len() >= 1 {
for arg in &arguments[..] {
args.push(' ');
let arg = fix_arg_if_needed(arg).unwrap();
args.push_str(arg.as_str());
let replaced = arg.replace('"', "\\\"");
let arg = fix_arg_if_needed(&replaced)?;
args.push_str(arg.as_ref());
}
}

Expand Down Expand Up @@ -384,7 +386,7 @@ impl CompilationState {
&self,
file_manager: &FileManager,
driver: &mut impl FnMut(&File, Vec<CompiledItem>) -> Result<()>,
) -> Result<(), Vec<anyhow::Error>> {
) -> Result<(), Vec<Error>> {
{
let completed = file_manager.completed_ast.borrow();
log::debug!(
Expand All @@ -401,7 +403,7 @@ impl CompilationState {
file_manager: &FileManager,
driver: &mut impl FnMut(&File, Vec<CompiledItem>) -> Result<()>,
depth: usize,
) -> Result<(), Vec<anyhow::Error>> {
) -> Result<(), Vec<Error>> {
let mut view = self.compilation_queue.borrow_mut();

let mut node @ Some(..) = view.take() else {
Expand Down Expand Up @@ -452,7 +454,7 @@ impl CompilationState {

pub fn free_loop_register(&self, register: NumberLoopRegister) {
if let NumberLoopRegister::Generated(id) = register {
assert!(self.loop_register_c.get() == id);
assert_eq!(self.loop_register_c.get(), id);

self.loop_register_c.set(id - 1);
}
Expand Down Expand Up @@ -503,7 +505,7 @@ impl CompilationState {
}
}

pub(crate) trait Compile<E = anyhow::Error> {
pub(crate) trait Compile<E = Error> {
fn compile(&self, state: &CompilationState) -> Result<Vec<CompiledItem>, E>;
}

Expand Down Expand Up @@ -580,10 +582,10 @@ impl<'a> From<&'a Ident> for Dependency<'a> {
///
/// The algorithm for "satisfying" dependencies is as follows:
/// * If dependency.name != supplied.name, continue
/// * If dependency is a variable from a child scope, succeed if supplied.type == dependency.type
/// * If dependency is a variable from a child scope, succeed if `supplied.type == dependency.type`
/// * If dependency is not a variable from a child scope, it will be propagated regardless of whether the types
/// match (required for cases where a variable from a parent scope is copied to a separate local variable with the same name).
/// * Succeed if supplied.type == dependency.type
/// * Succeed if `supplied.type == dependency.type`
pub(crate) fn get_net_dependencies(ast_item: &dyn Dependencies, is_scope: bool) -> Vec<Dependency> {
let supplies = ast_item.supplies();
let dependencies = ast_item.dependencies();
Expand Down Expand Up @@ -637,7 +639,7 @@ pub(crate) trait Dependencies {

/// This function is used to calculate the outstanding dependencies by filtering out identities
/// _needed_ from the identities _supplied_ by an AST node. If implementing an AST node that
/// **DIRECTLY** creates a scope (For example, a block of code), this method should be overriden.
/// **DIRECTLY** creates a scope (For example, a block of code), this method should be overridden.
///
/// [`get_net_dependencies(self, false)`](get_net_dependencies) is the default implementation.
/// Override this function and pass `true` if dealing with an AST node that creates a scope.
Expand Down
9 changes: 5 additions & 4 deletions compiler/src/ast/string.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::borrow::Cow;

use anyhow::Result;
use bytecode::compilation_bridge::split_string_v2;

use crate::{
instruction,
Expand Down Expand Up @@ -29,7 +28,7 @@ impl Dependencies for AstString {
}

impl IntoType for AstString {
fn for_type(&self) -> Result<super::TypeLayout> {
fn for_type(&self) -> Result<TypeLayout> {
match self {
AstString::Plain(str) => Ok(TypeLayout::Native(NativeType::Str(StrWrapper(Some(
str.len(),
Expand All @@ -51,7 +50,9 @@ impl Compile for AstString {
impl Parser {
pub fn string(input: Node) -> Result<AstString> {
let as_str = input.as_str();
let encoded = bytecode::compilation_bridge::split_string(Cow::Borrowed(as_str))?[0].clone();
let bridge_parts = split_string_v2(&("\"".to_owned() + as_str + "\""), false)?;

let encoded = bridge_parts[0].clone();

Ok(AstString::Plain(encoded))
}
Expand Down
78 changes: 72 additions & 6 deletions compiler/src/tests/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ fn types_with_reassignment() {
}

#[test]
#[ignore = "2/4/2024 - type updates are no longer in spec"]
#[should_panic = "type mismatch: this assignment will update a variable with type `int`, which is not compatible with the original type `str`"]
fn type_updates() {
eval(
r#"
Expand All @@ -224,9 +224,8 @@ fn type_updates() {
.unwrap()
}

/// `2` is an invalid index into "hi", so this test asserts that types and values are being updated
#[test]
#[ignore = "2/4/2024 - type updates are no longer in spec"]
#[should_panic = "type mismatch: this assignment will update a variable with type `[int...]`, which is not compatible with the original type `str`"]
fn type_updates_index() {
eval(
r#"
Expand All @@ -240,12 +239,12 @@ fn type_updates_index() {
}

#[test]
#[should_panic = "cannot index with `3` into list whose known valid safe indexes are 0..2"]
#[should_panic = "type mismatch: this assignment will update a variable with type `[int, int, int]`, which is not compatible with the original type `str`"]
fn invalid_index() {
eval(
r#"
x = "hi"
x = [1, 2, 3]
const x = [1, 2, 3]
x[3]
"#,
Expand Down Expand Up @@ -470,5 +469,72 @@ fn list_with_optional_class() {
assert cats_str == ["Cat named Muna is a tabby: Cat", "Cat named Scout is a tabby: Cat", "Cat named Odie is a tabby: Cat", "Cat named Air Bud is a tabby: Cat", "Cat named Old Yeller is a tabby: Cat"]
"#,
)
.unwrap();
.unwrap();
}

#[test]
fn escape_sequence_strings() {
eval(
r#"
const MESSAGE = "Santa said, \"Ho Ho Ho!\" (North Pole Chronicle 52).\n\t--12/25/0002"
assert MESSAGE[12] == "\""
const QUOT_ORD: byte = 34.to_byte()
const LF_ORD: byte = 10.to_byte()
const TAB_ORD: byte = 9.to_byte()
assert MESSAGE[12] == QUOT_ORD.to_ascii()
assert MESSAGE.index_of("\n") == 50
assert MESSAGE[50] == LF_ORD.to_ascii()
assert MESSAGE[51] == TAB_ORD.to_ascii()
"#,
)
.unwrap()
}

/// https://github.com/mrodz/mscript/issues/185
#[test]
fn cross_file_escape_behavior() {
EvalEnvironment::entrypoint(
"main.ms",
r#"
import other
const MESSAGE = "hello \"world\""
print MESSAGE == other.MESSAGE
print MESSAGE
print other.MESSAGE
me = other.Person("Billy Bob")
print me.to_str()
assert me.to_str() == "Person {\n\tname: \"Billy Bob\"\n}"
"#,
)
.unwrap()
.add(
"other.ms",
r#"
export const MESSAGE: str = "hello \"world\""
export class Person {
name: str
constructor(self, name: str) {
self.name = name
}
fn to_str(self) -> str {
return "Person {\n\tname: \"" + self.name + "\"\n}"
}
}
"#,
)
.unwrap()
.run()
.unwrap()
}

0 comments on commit ed9ce3d

Please sign in to comment.