diff --git a/include/con4m.h b/include/con4m.h
index e917cf5c..3b51ffdc 100644
--- a/include/con4m.h
+++ b/include/con4m.h
@@ -72,3 +72,7 @@ typedef void *object_t;
 
 // Yes we use cryptographic hashes internally for type IDing.
 #include "crypto/sha.h"
+
+// The front end.
+#include "con4m/frontend/compile.h"
+#include "con4m/frontend/lex.h" // Lexical Tokens
diff --git a/include/con4m/codepoint.h b/include/con4m/codepoint.h
index ddaaf4e7..00bd7aaf 100644
--- a/include/con4m/codepoint.h
+++ b/include/con4m/codepoint.h
@@ -17,12 +17,62 @@ c4m_codepoint_is_space(c4m_codepoint_t cp)
     }
 
     switch (c4m_codepoint_category(cp)) {
-    case CP_CATEGORY_ZS:
+    case UTF8PROC_CATEGORY_ZS:
         return true;
-    case CP_CATEGORY_ZL:
-    case CP_CATEGORY_ZP:
+    case UTF8PROC_CATEGORY_ZL:
+    case UTF8PROC_CATEGORY_ZP:
         return true;
     default:
         return false;
     }
 }
+
+static inline bool
+c4m_codepoint_is_c4m_id_start(c4m_codepoint_t cp)
+{
+    switch (utf8proc_category(cp)) {
+    case UTF8PROC_CATEGORY_LU:
+    case UTF8PROC_CATEGORY_LL:
+    case UTF8PROC_CATEGORY_LT:
+    case UTF8PROC_CATEGORY_LM:
+    case UTF8PROC_CATEGORY_LO:
+    case UTF8PROC_CATEGORY_NL:
+        return true;
+    default:
+        switch (cp) {
+        case '_':
+        case '?':
+        case '$':
+            return true;
+        default:
+            return false;
+        }
+    }
+}
+
+static inline bool
+c4m_codepoint_is_c4m_id_continue(c4m_codepoint_t cp)
+{
+    switch (utf8proc_category(cp)) {
+    case UTF8PROC_CATEGORY_LU:
+    case UTF8PROC_CATEGORY_LL:
+    case UTF8PROC_CATEGORY_LT:
+    case UTF8PROC_CATEGORY_LM:
+    case UTF8PROC_CATEGORY_LO:
+    case UTF8PROC_CATEGORY_NL:
+    case UTF8PROC_CATEGORY_ND:
+    case UTF8PROC_CATEGORY_MN:
+    case UTF8PROC_CATEGORY_MC:
+    case UTF8PROC_CATEGORY_PC:
+        return true;
+    default:
+        switch (cp) {
+        case '_':
+        case '?':
+        case '$':
+            return true;
+        default:
+            return false;
+        }
+    }
+}
diff --git a/include/con4m/conststr.h b/include/con4m/conststr.h
index ab22ba7b..34719927 100644
--- a/include/con4m/conststr.h
+++ b/include/con4m/conststr.h
@@ -15,3 +15,5 @@ extern c4m_utf8_t *c4m_get_lbrace_const();
 extern c4m_utf8_t *c4m_get_rbrace_const();
 extern c4m_utf8_t *c4m_get_colon_const();
 extern c4m_utf8_t *c4m_get_colon_no_space_const();
+extern c4m_utf8_t *c4m_get_slash_const();
+extern c4m_utf8_t *c4m_get_period_const();
diff --git a/include/con4m/datatypes.h b/include/con4m/datatypes.h
index f777d02e..04aec34f 100644
--- a/include/con4m/datatypes.h
+++ b/include/con4m/datatypes.h
@@ -20,6 +20,7 @@
 #include "con4m/datatypes/tuples.h"
 #include "con4m/datatypes/callbacks.h"
 #include "con4m/datatypes/streams.h"
+#include "con4m/datatypes/frontend.h"
 
 typedef c4m_str_t *(*c4m_repr_fn)(c4m_obj_t, to_str_use_t);
 typedef void (*c4m_marshal_fn)(c4m_obj_t,
diff --git a/include/con4m/datatypes/frontend.h b/include/con4m/datatypes/frontend.h
new file mode 100644
index 00000000..27b5e052
--- /dev/null
+++ b/include/con4m/datatypes/frontend.h
@@ -0,0 +1,148 @@
+#pragma once
+#include "con4m.h"
+
+typedef enum {
+    c4m_tt_space,
+    c4m_tt_semi,
+    c4m_tt_newline,
+    c4m_tt_line_comment,
+    c4m_tt_lock_attr,
+    c4m_tt_plus,
+    c4m_tt_minus,
+    c4m_tt_mul,
+    c4m_tt_long_comment,
+    c4m_tt_div,
+    c4m_tt_mod,
+    c4m_tt_lte,
+    c4m_tt_lt,
+    c4m_tt_gte,
+    c4m_tt_gt,
+    c4m_tt_neq,
+    c4m_tt_not,
+    c4m_tt_colon,
+    c4m_tt_assign,
+    c4m_tt_cmp,
+    c4m_tt_comma,
+    c4m_tt_period,
+    c4m_tt_lbrace,
+    c4m_tt_rbrace,
+    c4m_tt_lbracket,
+    c4m_tt_rbracket,
+    c4m_tt_lparen,
+    c4m_tt_rparen,
+    c4m_tt_and,
+    c4m_tt_or,
+    c4m_tt_int_lit,
+    c4m_tt_hex_lit,
+    c4m_tt_float_lit,
+    c4m_tt_string_lit,
+    c4m_tt_char_lit,
+    c4m_tt_true,
+    c4m_tt_false,
+    c4m_tt_nil,
+    c4m_tt_if,
+    c4m_tt_elif,
+    c4m_tt_else,
+    c4m_tt_for,
+    c4m_tt_from,
+    c4m_tt_to,
+    c4m_tt_break,
+    c4m_tt_continue,
+    c4m_tt_return,
+    c4m_tt_enum,
+    c4m_tt_identifier,
+    c4m_tt_func,
+    c4m_tt_var,
+    c4m_tt_global,
+    c4m_tt_const,
+    c4m_tt_unquoted_lit,
+    c4m_tt_backtick,
+    c4m_tt_arrow,
+    c4m_tt_object,
+    c4m_tt_while,
+    c4m_tt_in,
+    c4m_tt_bit_and,
+    c4m_tt_bit_or,
+    c4m_tt_bit_xor,
+    c4m_tt_shl,
+    c4m_tt_shr,
+    c4m_tt_typeof,
+    c4m_tt_switch,
+    c4m_tt_case,
+    c4m_tt_plus_eq,
+    c4m_tt_minus_eq,
+    c4m_tt_mul_eq,
+    c4m_tt_div_eq,
+    c4m_tt_mod_eq,
+    c4m_tt_bit_and_eq,
+    c4m_tt_bit_or_eq,
+    c4m_tt_bit_xor_eq,
+    c4m_tt_shl_eq,
+    c4m_tt_shr_eq,
+    c4m_tt_sof,
+    c4m_tt_eof,
+    c4m_tt_lex_error
+} c4m_token_kind_t;
+
+typedef enum {
+    c4m_err_open_file,
+    c4m_err_lex_stray_cr,
+    c4m_err_lex_eof_in_comment,
+    c4m_err_lex_invalid_char,
+    c4m_err_lex_eof_in_str_lit,
+    c4m_err_lex_nl_in_str_lit,
+    c4m_err_lex_eof_in_char_lit,
+    c4m_err_lex_nl_in_char_lit,
+    c4m_err_lex_extra_in_char_lit,
+    c4m_err_lex_esc_in_esc,
+    c4m_err_lex_invalid_float_lit,
+    c4m_err_lex_float_oflow,
+    c4m_err_lex_float_uflow,
+    c4m_err_lex_int_oflow,
+    c4m_err_last,
+} c4m_compile_error_t;
+
+typedef struct {
+    c4m_codepoint_t *start_ptr;
+    c4m_codepoint_t *end_ptr;
+    c4m_utf32_t     *literal_modifier;
+    void            *literal_value; // Once parsed.
+    c4m_token_kind_t kind;
+    int              token_id;
+    int              line_no;
+    int              line_offset;
+    uint8_t          adjustment; // For keeping track of quoting.
+} c4m_token_t;
+
+typedef struct {
+    c4m_compile_error_t code;
+    // These will probably turn into a tagged union or transparent
+    // pointer with a phase indicator, so we can design the aux data
+    // appropriate per-phase.
+    c4m_token_t        *current_token;
+    c4m_str_t          *exception_message;
+} c4m_compile_error;
+
+typedef struct {
+    // The module_id is calculated by combining the package name and the
+    // module name, then hashing it with SHA256. We use Unix style paths
+    // but this is not necessarily derived from the URI path.
+    //
+    // Note that packages (and our combining of it and the module) use
+    // dotted syntax like with most PLs. When we combine for the hash,
+    // we add a dot in there.
+    //
+    // c4m_new_compile_ctx will add __default__ as the package if none
+    // is provided. The URI fields are optional (via API you can just
+    // pass raw source as long as you give at least a module name).
+
+    __int128_t   module_id;
+    c4m_str_t   *scheme;    // http, https or file; if NULL, then file.
+    c4m_str_t   *authority; // http/s only.
+    c4m_str_t   *path;      // Path component in the URI.
+    c4m_str_t   *package;   // Package name.
+    c4m_str_t   *module;    // Module name.
+    c4m_utf32_t *raw;       // raw contents read when we do the lex pass.
+    c4m_xlist_t *tokens;    // an xlist of x4m_token_t objects;
+    c4m_xlist_t *errors;    // an xlist of c4m_compile_errors
+} c4m_file_compile_ctx;
diff --git a/include/con4m/datatypes/grids.h b/include/con4m/datatypes/grids.h
index 903cc679..5e383f8a 100644
--- a/include/con4m/datatypes/grids.h
+++ b/include/con4m/datatypes/grids.h
@@ -173,13 +173,13 @@ typedef struct {
 } c4m_renderable_t;
 
 struct c4m_grid_t {
-    c4m_renderable_t    *self;
-    c4m_renderable_t   **cells; // A 2d array of renderable_objects, by ref
-    uint16_t             num_cols;
-    uint16_t             num_rows;
-    uint16_t             spare_rows;
-    c4m_render_style_t **col_props;
-    c4m_render_style_t **row_props;
+    c4m_renderable_t  *self;
+    c4m_renderable_t **cells; // A 2d array of renderable_objects, by ref
+    uint16_t           num_cols;
+    uint16_t           num_rows;
+    uint16_t           spare_rows;
+    c4m_dict_t        *col_props; // dict of int:c4m_render_style_t **
+    c4m_dict_t        *row_props;
 
     // Per-render info, which includes any adding added to perform
     // alignment of the grid within the dimensions we're given.
diff --git a/include/con4m/frontend/compile.h b/include/con4m/frontend/compile.h
new file mode 100644
index 00000000..4ced7725
--- /dev/null
+++ b/include/con4m/frontend/compile.h
@@ -0,0 +1,9 @@
+#pragma once
+#include "con4m.h"
+
+c4m_file_compile_ctx *_c4m_new_compile_ctx(c4m_str_t *module_name, ...);
+bool                  c4m_validate_module_info(c4m_file_compile_ctx *);
+c4m_stream_t         *c4m_load_code(c4m_file_compile_ctx *);
+
+#define c4m_new_compile_ctx(m, ...) \
+    _c4m_new_compile_ctx(m, KFUNC(__VA_ARGS__))
diff --git a/include/con4m/frontend/lex.h b/include/con4m/frontend/lex.h
new file mode 100644
index 00000000..2eae9928
--- /dev/null
+++ b/include/con4m/frontend/lex.h
@@ -0,0 +1,5 @@
+#pragma once
+#include "con4m.h"
+
+bool        c4m_lex(c4m_file_compile_ctx *, c4m_stream_t *);
+c4m_grid_t *c4m_format_tokens(c4m_file_compile_ctx *);
diff --git a/include/con4m/grid.h b/include/con4m/grid.h
index 51f97a54..3ba83507 100644
--- a/include/con4m/grid.h
+++ b/include/con4m/grid.h
@@ -26,12 +26,20 @@ c4m_get_td_tag(c4m_grid_t *g)
 }
 void c4m_grid_set_all_contents(c4m_grid_t *, flexarray_t *);
 
-extern c4m_grid_t *c4m_grid_flow(uint64_t items, ...);
-c4m_utf32_t       *c4m_grid_to_str(c4m_grid_t *, to_str_use_t);
-extern c4m_grid_t *_c4m_ordered_list(flexarray_t *, ...);
-extern c4m_grid_t *_c4m_unordered_list(flexarray_t *, ...);
-extern c4m_grid_t *_c4m_grid_tree(c4m_tree_node_t *, ...);
-c4m_xlist_t       *_c4m_grid_render(c4m_grid_t *, ...);
+extern c4m_grid_t  *c4m_grid_flow(uint64_t items, ...);
+extern c4m_utf32_t *c4m_grid_to_str(c4m_grid_t *, to_str_use_t);
+extern c4m_grid_t  *_c4m_ordered_list(flexarray_t *, ...);
+extern c4m_grid_t  *_c4m_unordered_list(flexarray_t *, ...);
+extern c4m_grid_t  *_c4m_grid_tree(c4m_tree_node_t *, ...);
+extern c4m_xlist_t *_c4m_grid_render(c4m_grid_t *, ...);
+extern void         c4m_set_column_props(c4m_grid_t *,
+                                         int,
+                                         c4m_render_style_t *);
+extern void         c4m_row_column_props(c4m_grid_t *,
+                                         int,
+                                         c4m_render_style_t *);
+extern void         c4m_set_column_style(c4m_grid_t *, int, char *);
+extern void         c4m_set_row_style(c4m_grid_t *, int, char *);
 
 #define c4m_grid_render(g, ...)    _c4m_grid_render(g, KFUNC(__VA_ARGS__))
 #define c4m_ordered_list(l, ...)   _c4m_ordered_list(l, KFUNC(__VA_ARGS__))
@@ -52,30 +60,6 @@ c4m_to_str_renderable(c4m_str_t *s, char *tag)
                    c4m_kw("obj", c4m_ka(s), "tag", c4m_ka(tag)));
 }
 
-static inline void
-c4m_set_column_style(c4m_grid_t *grid, int col, char *tag)
-{
-    grid->col_props[col] = c4m_lookup_cell_style(tag);
-}
-
-static inline void
-c4m_set_row_style(c4m_grid_t *grid, int row, char *tag)
-{
-    grid->row_props[row] = c4m_lookup_cell_style(tag);
-}
-
-static inline void
-c4m_set_column_props(c4m_grid_t *grid, int col, c4m_render_style_t *s)
-{
-    grid->col_props[col] = s;
-}
-
-static inline void
-c4m_set_row_props(c4m_grid_t *grid, int row, c4m_render_style_t *s)
-{
-    grid->row_props[row] = s;
-}
-
 static inline c4m_style_t
 c4m_grid_blend_color(c4m_style_t style1, c4m_style_t style2)
 {
@@ -183,3 +167,12 @@ c4m_grid_stripe_rows(c4m_grid_t *grid)
 {
     grid->stripe = 1;
 }
+
+#ifdef C4M_USE_INTERNAL_API
+
+static inline c4m_xlist_t *
+c4m_new_table_row()
+{
+    return c4m_new(c4m_tspec_xlist(c4m_tspec_utf32()));
+}
+#endif
diff --git a/include/con4m/stream.h b/include/con4m/stream.h
index dbb35b98..c64c0245 100644
--- a/include/con4m/stream.h
+++ b/include/con4m/stream.h
@@ -1,15 +1,16 @@
 #pragma once
 #include "con4m.h"
 
-c4m_obj_t c4m_stream_raw_read(c4m_stream_t *, int64_t, char *);
-size_t    c4m_stream_raw_write(c4m_stream_t *, int64_t, char *);
-void      _c4m_stream_write_object(c4m_stream_t *, c4m_obj_t, bool);
-bool      c4m_stream_at_eof(c4m_stream_t *);
-int64_t   c4m_stream_get_location(c4m_stream_t *);
-void      c4m_stream_set_location(c4m_stream_t *, int64_t);
-void      c4m_stream_close(c4m_stream_t *);
-void      c4m_stream_flush(c4m_stream_t *);
-void      _c4m_print(c4m_obj_t, ...);
+extern c4m_obj_t *c4m_stream_raw_read(c4m_stream_t *, int64_t, char *);
+extern size_t     c4m_stream_raw_write(c4m_stream_t *, int64_t, char *);
+extern void       _c4m_stream_write_object(c4m_stream_t *, c4m_obj_t, bool);
+extern bool       c4m_stream_at_eof(c4m_stream_t *);
+extern int64_t    c4m_stream_get_location(c4m_stream_t *);
+extern void       c4m_stream_set_location(c4m_stream_t *, int64_t);
+extern void       c4m_stream_close(c4m_stream_t *);
+extern void       c4m_stream_flush(c4m_stream_t *);
+extern void       _c4m_print(c4m_obj_t, ...);
+extern c4m_obj_t *c4m_stream_read_all(c4m_stream_t *);
 
 #define c4m_stream_write_object(s, o, ...) \
     _c4m_stream_write_object(s, o, IF(ISEMPTY(__VA_ARGS__))(false) __VA_ARGS__)
@@ -108,7 +109,7 @@ buffer_iostream(c4m_buf_t *buf)
 }
 
 static inline c4m_stream_t *
-file_instream(c4m_str_t *filename, c4m_builtin_t output_type)
+c4m_file_instream(c4m_str_t *filename, c4m_builtin_t output_type)
 {
     return c4m_new(c4m_tspec_stream(),
                    c4m_kw("filename",
diff --git a/include/vendor/utf8proc.h b/include/vendor/utf8proc.h
index af5f353d..f25cd51c 100644
--- a/include/vendor/utf8proc.h
+++ b/include/vendor/utf8proc.h
@@ -3,36 +3,36 @@
 #include "con4m/base.h"
 
 typedef enum {
-  CP_CATEGORY_CN  = 0, /**< Other, not assigned */
-  CP_CATEGORY_LU  = 1, /**< Letter, uppercase */
-  CP_CATEGORY_LL  = 2, /**< Letter, lowercase */
-  CP_CATEGORY_LT  = 3, /**< Letter, titlecase */
-  CP_CATEGORY_LM  = 4, /**< Letter, modifier */
-  CP_CATEGORY_LO  = 5, /**< Letter, other */
-  CP_CATEGORY_MN  = 6, /**< Mark, nonspacing */
-  CP_CATEGORY_MC  = 7, /**< Mark, spacing combining */
-  CP_CATEGORY_ME  = 8, /**< Mark, enclosing */
-  CP_CATEGORY_ND  = 9, /**< Number, decimal digit */
-  CP_CATEGORY_NL = 10, /**< Number, letter */
-  CP_CATEGORY_NO = 11, /**< Number, other */
-  CP_CATEGORY_PC = 12, /**< Punctuation, connector */
-  CP_CATEGORY_PD = 13, /**< Punctuation, dash */
-  CP_CATEGORY_PS = 14, /**< Punctuation, open */
-  CP_CATEGORY_PE = 15, /**< Punctuation, close */
-  CP_CATEGORY_PI = 16, /**< Punctuation, initial quote */
-  CP_CATEGORY_PF = 17, /**< Punctuation, final quote */
-  CP_CATEGORY_PO = 18, /**< Punctuation, other */
-  CP_CATEGORY_SM = 19, /**< Symbol, math */
-  CP_CATEGORY_SC = 20, /**< Symbol, currency */
-  CP_CATEGORY_SK = 21, /**< Symbol, modifier */
-  CP_CATEGORY_SO = 22, /**< Symbol, other */
-  CP_CATEGORY_ZS = 23, /**< Separator, space */
-  CP_CATEGORY_ZL = 24, /**< Separator, line */
-  CP_CATEGORY_ZP = 25, /**< Separator, paragraph */
-  CP_CATEGORY_CC = 26, /**< Other, control */
-  CP_CATEGORY_CF = 27, /**< Other, format */
-  CP_CATEGORY_CS = 28, /**< Other, surrogate */
-  CP_CATEGORY_CO = 29, /**< Other, private use */
+    UTF8PROC_CATEGORY_CN = 0,  /**< Other, not assigned */
+    UTF8PROC_CATEGORY_LU = 1,  /**< Letter, uppercase */
+    UTF8PROC_CATEGORY_LL = 2,  /**< Letter, lowercase */
+    UTF8PROC_CATEGORY_LT = 3,  /**< Letter, titlecase */
+    UTF8PROC_CATEGORY_LM = 4,  /**< Letter, modifier */
+    UTF8PROC_CATEGORY_LO = 5,  /**< Letter, other */
+    UTF8PROC_CATEGORY_MN = 6,  /**< Mark, nonspacing */
+    UTF8PROC_CATEGORY_MC = 7,  /**< Mark, spacing combining */
+    UTF8PROC_CATEGORY_ME = 8,  /**< Mark, enclosing */
+    UTF8PROC_CATEGORY_ND = 9,  /**< Number, decimal digit */
+    UTF8PROC_CATEGORY_NL = 10, /**< Number, letter */
+    UTF8PROC_CATEGORY_NO = 11, /**< Number, other */
+    UTF8PROC_CATEGORY_PC = 12, /**< Punctuation, connector */
+    UTF8PROC_CATEGORY_PD = 13, /**< Punctuation, dash */
+    UTF8PROC_CATEGORY_PS = 14, /**< Punctuation, open */
+    UTF8PROC_CATEGORY_PE = 15, /**< Punctuation, close */
+    UTF8PROC_CATEGORY_PI = 16, /**< Punctuation, initial quote */
+    UTF8PROC_CATEGORY_PF = 17, /**< Punctuation, final quote */
+    UTF8PROC_CATEGORY_PO = 18, /**< Punctuation, other */
+    UTF8PROC_CATEGORY_SM = 19, /**< Symbol, math */
+    UTF8PROC_CATEGORY_SC = 20, /**< Symbol, currency */
+    UTF8PROC_CATEGORY_SK = 21, /**< Symbol, modifier */
+    UTF8PROC_CATEGORY_SO = 22, /**< Symbol, other */
+    UTF8PROC_CATEGORY_ZS = 23, /**< Separator, space */
+    UTF8PROC_CATEGORY_ZL = 24, /**< Separator, line */
+    UTF8PROC_CATEGORY_ZP = 25, /**< Separator, paragraph */
+    UTF8PROC_CATEGORY_CC = 26, /**< Other, control */
+    UTF8PROC_CATEGORY_CF = 27, /**< Other, format */
+    UTF8PROC_CATEGORY_CS = 28, /**< Other, surrogate */
+    UTF8PROC_CATEGORY_CO = 29, /**< Other, private use */
 } cp_category_t;
 
 typedef enum {
@@ -42,99 +42,97 @@ typedef enum {
 } lbreak_kind_t;
 
 typedef enum {
-  /** The given UTF-8 input is NULL terminated. */
-  UTF8PROC_NULLTERM  = (1<<0),
-
-  /** Unicode Versioning Stability has to be respected. */
-  UTF8PROC_STABLE    = (1<<1),
-
-  /** Compatibility decomposition (i.e. formatting information is lost). */
-  UTF8PROC_COMPAT    = (1<<2),
-
-  /** Return a result with composed characters. */
-  UTF8PROC_COMPOSE   = (1<<3),
-
-  /** Return a result with decomposed characters. */
-  UTF8PROC_DECOMPOSE = (1<<4),
-
-  /** Strip "default ignorable characters" such as SOFT-HYPHEN or
-   * ZERO-WIDTH-SPACE. */
-  UTF8PROC_IGNORE    = (1<<5),
-
-  /** Return an error, if the input contains unassigned codepoints. */
-  UTF8PROC_REJECTNA  = (1<<6),
-
-  /**
-   * Indicating that NLF-sequences (LF, CRLF, CR, NEL) are representing a
-   * line break, and should be converted to the codepoint for line
-   * separation (LS).
-   */
-  UTF8PROC_NLF2LS    = (1<<7),
-
-  /**
-   * Indicating that NLF-sequences are representing a paragraph break, and
-   * should be converted to the codepoint for paragraph separation
-   * (PS).
-   */
-  UTF8PROC_NLF2PS    = (1<<8),
-
-  /** Indicating that the meaning of NLF-sequences is unknown. */
-  UTF8PROC_NLF2LF    = (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS),
-
-  /** Strips and/or convers control characters.
-   *
-   * NLF-sequences are transformed into space, except if one of the
-   * NLF2LS/PS/LF options is given. HorizontalTab (HT) and FormFeed (FF)
-   * are treated as a NLF-sequence in this case.  All other control
-   * characters are simply removed.
-   */
-  UTF8PROC_STRIPCC   = (1<<9),
-
-  /**
-   * Performs unicode case folding, to be able to do a case-insensitive
-   * string comparison.
-   */
-  UTF8PROC_CASEFOLD  = (1<<10),
-
-  /**
-   * Inserts 0xFF bytes at the beginning of each sequence which is
-   * representing a single grapheme cluster (see UAX#29).
-   */
-  UTF8PROC_CHARBOUND = (1<<11),
-
-  /** Lumps certain characters together.
-   *
-   * E.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-". See lump.md for details.
-   *
-   * If NLF2LF is set, this includes a transformation of paragraph and
-   * line separators to ASCII line-feed (LF).
-   */
-  UTF8PROC_LUMP      = (1<<12),
-
-  /** Strips all character markings.
-   *
-   * This includes non-spacing, spacing and enclosing (i.e. accents).
-   * @note This option works only with @ref UTF8PROC_COMPOSE or
-   *       @ref UTF8PROC_DECOMPOSE
-   */
-  UTF8PROC_STRIPMARK = (1<<13),
-
-  /**
-   * Strip unassigned codepoints.
-   */
-  UTF8PROC_STRIPNA    = (1<<14),
+    /** The given UTF-8 input is NULL terminated. */
+    UTF8PROC_NULLTERM = (1 << 0),
+
+    /** Unicode Versioning Stability has to be respected. */
+    UTF8PROC_STABLE = (1 << 1),
+
+    /** Compatibility decomposition (i.e. formatting information is lost). */
+    UTF8PROC_COMPAT = (1 << 2),
+
+    /** Return a result with composed characters. */
+    UTF8PROC_COMPOSE = (1 << 3),
+
+    /** Return a result with decomposed characters. */
+    UTF8PROC_DECOMPOSE = (1 << 4),
+
+    /** Strip "default ignorable characters" such as SOFT-HYPHEN or
+     * ZERO-WIDTH-SPACE. */
+    UTF8PROC_IGNORE = (1 << 5),
+
+    /** Return an error, if the input contains unassigned codepoints. */
+    UTF8PROC_REJECTNA = (1 << 6),
+
+    /**
+     * Indicating that NLF-sequences (LF, CRLF, CR, NEL) are representing a
+     * line break, and should be converted to the codepoint for line
+     * separation (LS).
+     */
+    UTF8PROC_NLF2LS = (1 << 7),
+
+    /**
+     * Indicating that NLF-sequences are representing a paragraph break, and
+     * should be converted to the codepoint for paragraph separation
+     * (PS).
+     */
+    UTF8PROC_NLF2PS = (1 << 8),
+
+    /** Indicating that the meaning of NLF-sequences is unknown. */
+    UTF8PROC_NLF2LF = (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS),
+
+    /** Strips and/or convers control characters.
+     *
+     * NLF-sequences are transformed into space, except if one of the
+     * NLF2LS/PS/LF options is given. HorizontalTab (HT) and FormFeed (FF)
+     * are treated as a NLF-sequence in this case.  All other control
+     * characters are simply removed.
+     */
+    UTF8PROC_STRIPCC = (1 << 9),
+
+    /**
+     * Performs unicode case folding, to be able to do a case-insensitive
+     * string comparison.
+     */
+    UTF8PROC_CASEFOLD = (1 << 10),
+
+    /**
+     * Inserts 0xFF bytes at the beginning of each sequence which is
+     * representing a single grapheme cluster (see UAX#29).
+     */
+    UTF8PROC_CHARBOUND = (1 << 11),
+
+    /** Lumps certain characters together.
+     *
+     * E.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-". See lump.md for details.
+     *
+     * If NLF2LF is set, this includes a transformation of paragraph and
+     * line separators to ASCII line-feed (LF).
+     */
+    UTF8PROC_LUMP = (1 << 12),
+
+    /** Strips all character markings.
+     *
+     * This includes non-spacing, spacing and enclosing (i.e. accents).
+     * @note This option works only with @ref UTF8PROC_COMPOSE or
+     *       @ref UTF8PROC_DECOMPOSE
+     */
+    UTF8PROC_STRIPMARK = (1 << 13),
+
+    /**
+     * Strip unassigned codepoints.
+     */
+    UTF8PROC_STRIPNA = (1 << 14),
 } utf8proc_option_t;
 
 // From libutf8proc
-extern int utf8proc_iterate(const uint8_t *str, ssize_t len, int32_t *cp);
-extern bool utf8proc_codepoint_valid(int32_t cp);
-extern int utf8proc_encode_char(int32_t cp, uint8_t *dst);
+extern int           utf8proc_iterate(const uint8_t *str, ssize_t len, int32_t *cp);
+extern bool          utf8proc_codepoint_valid(int32_t cp);
+extern int           utf8proc_encode_char(int32_t cp, uint8_t *dst);
 extern cp_category_t utf8proc_category(int32_t cp);
-extern int utf8proc_charwidth(int32_t cp);
-extern int32_t utf8proc_tolower(int32_t cp);
-extern int32_t utf8proc_toupper(int32_t cp);
-extern int utf8proc_charwidth(int32_t cp);
-extern bool utf8proc_grapheme_break_stateful(int32_t cp1, int32_t cp2,
-					     int32_t *state);
-extern int32_t utf8proc_map(const uint8_t *str, int32_t len, uint8_t **out,
-			    utf8proc_option_t options);
+extern int           utf8proc_charwidth(int32_t cp);
+extern int32_t       utf8proc_tolower(int32_t cp);
+extern int32_t       utf8proc_toupper(int32_t cp);
+extern int           utf8proc_charwidth(int32_t cp);
+extern bool          utf8proc_grapheme_break_stateful(int32_t cp1, int32_t cp2, int32_t *state);
+extern int32_t       utf8proc_map(const uint8_t *str, int32_t len, uint8_t **out, utf8proc_option_t options);
diff --git a/meson.build b/meson.build
index 2241d260..930b002d 100644
--- a/meson.build
+++ b/meson.build
@@ -86,6 +86,8 @@ c4m_src  = ['src/con4m/style.c',
             'src/con4m/literals.c',
             'src/con4m/init.c',                        
             'src/con4m/crypto/sha.c',
+            'src/con4m/frontend/compile.c',
+            'src/con4m/frontend/lex.c',
             ]
 
 hat_primary = ['src/hatrack/support/hatrack_common.c',
diff --git a/src/con4m/ansi.c b/src/con4m/ansi.c
index 1e4410ac..65adf06d 100644
--- a/src/con4m/ansi.c
+++ b/src/con4m/ansi.c
@@ -11,11 +11,11 @@ ignore_for_printing(c4m_codepoint_t cp)
     // control rendering as intended.
 
     switch (cp) {
-    case CP_CATEGORY_CN:
-    case CP_CATEGORY_CC:
-    case CP_CATEGORY_CF:
-    case CP_CATEGORY_CS:
-    case CP_CATEGORY_CO:
+    case UTF8PROC_CATEGORY_CN:
+    case UTF8PROC_CATEGORY_CC:
+    case UTF8PROC_CATEGORY_CF:
+    case UTF8PROC_CATEGORY_CS:
+    case UTF8PROC_CATEGORY_CO:
         if (cp == '\n') {
             return false;
         }
diff --git a/src/con4m/breaks.c b/src/con4m/breaks.c
index f82d3e33..fd496de0 100644
--- a/src/con4m/breaks.c
+++ b/src/con4m/breaks.c
@@ -91,8 +91,8 @@ internal_is_line_break(int32_t cp)
     }
 
     switch (utf8proc_category(cp)) {
-    case CP_CATEGORY_ZL:
-    case CP_CATEGORY_ZP:
+    case UTF8PROC_CATEGORY_ZL:
+    case UTF8PROC_CATEGORY_ZP:
         return true;
     default:
         return false;
diff --git a/src/con4m/conststr.c b/src/con4m/conststr.c
index 12d0f44a..2f5c532a 100644
--- a/src/con4m/conststr.c
+++ b/src/con4m/conststr.c
@@ -14,7 +14,9 @@ enum {
     RBRACE_IX = 10,
     COLON_IX  = 11,
     COLON_NSP = 12,
-    PUNC_MAX  = 13
+    SLASH_IX  = 13,
+    PERIOD_IX = 14,
+    PUNC_MAX  = 15
 };
 
 static c4m_str_t *type_punct[PUNC_MAX] = {
@@ -41,6 +43,7 @@ init_punctuation()
         type_punct[COLON_IX]  = c4m_new(c4m_tspec_utf8(),
                                        c4m_kw("cstring", c4m_ka(" : ")));
         type_punct[COLON_NSP] = c4m_utf8_repeat(':', 1);
+        type_punct[SLASH_IX]  = c4m_utf8_repeat('/', 1);
     }
     c4m_gc_register_root(&type_punct[0], PUNC_MAX);
 }
@@ -135,3 +138,17 @@ c4m_get_colon_no_space_const()
     init_punctuation();
     return type_punct[COLON_NSP];
 }
+
+c4m_utf8_t *
+c4m_get_slash_const()
+{
+    init_punctuation();
+    return type_punct[SLASH_IX];
+}
+
+c4m_utf8_t *
+c4m_get_period_const()
+{
+    init_punctuation();
+    return type_punct[PERIOD_IX];
+}
diff --git a/src/con4m/frontend/compile.c b/src/con4m/frontend/compile.c
new file mode 100644
index 00000000..f1420122
--- /dev/null
+++ b/src/con4m/frontend/compile.c
@@ -0,0 +1,159 @@
+#include "con4m.h"
+
+static c4m_str_t *
+module_name_from_path(c4m_str_t *path)
+{
+    c4m_xlist_t *parts     = c4m_str_xsplit(path, c4m_get_slash_const());
+    int          l         = c4m_xlist_len(parts);
+    c4m_str_t   *candidate = c4m_xlist_get(parts, l - 1, NULL);
+
+    l = c4m_str_find(candidate, c4m_get_period_const());
+
+    if (l == -1) {
+        return candidate;
+    }
+
+    return c4m_str_slice(candidate, 0, l);
+}
+
+c4m_file_compile_ctx *
+_c4m_new_compile_ctx(c4m_str_t *module_name, ...)
+{
+    c4m_file_compile_ctx *result;
+    c4m_str_t            *scheme    = NULL;
+    c4m_str_t            *authority = NULL;
+    c4m_str_t            *path      = NULL;
+    c4m_str_t            *package   = NULL;
+
+    c4m_karg_only_init(module_name);
+    c4m_kw_ptr("uri_scheme", scheme);
+    c4m_kw_ptr("uri_authority", authority);
+    c4m_kw_ptr("uri_path", path);
+    c4m_kw_ptr("package", package);
+
+    if (package == NULL) {
+        package = c4m_new(c4m_tspec_utf8(),
+                          c4m_kw("cstring", c4m_ka("__default__")));
+    }
+
+    if (module_name == NULL && path != NULL) {
+        module_name = module_name_from_path(path);
+    }
+
+    result            = c4m_gc_alloc(c4m_file_compile_ctx);
+    result->errors    = c4m_new(c4m_tspec_xlist(c4m_tspec_ref()));
+    result->scheme    = scheme;
+    result->authority = authority;
+    result->path      = path;
+    result->package   = package;
+    result->module    = module_name;
+
+    if (!c4m_validate_module_info(result)) {
+        C4M_CRAISE(
+            "Invalid module spec; the packages and the module name "
+            "must all be valid identifiers; package parts must be "
+            "separated by dots.");
+    }
+
+    return result;
+}
+
+bool
+c4m_validate_module_info(c4m_file_compile_ctx *ctx)
+{
+    c4m_codepoint_t cp;
+
+    if (ctx->package == NULL || ctx->module == NULL) {
+        return false;
+    }
+
+    int  plen   = c4m_str_codepoint_len(ctx->package);
+    int  mlen   = c4m_str_codepoint_len(ctx->module);
+    bool dot_ok = true; // We start at char 1.
+
+    if (plen == 0 || mlen == 0) {
+        return false;
+    }
+
+    cp = c4m_index(ctx->package, 0);
+    if (!c4m_codepoint_is_c4m_id_start(cp)) {
+        return false;
+    }
+
+    cp = c4m_index(ctx->module, 0);
+    if (!c4m_codepoint_is_c4m_id_start(cp)) {
+        return false;
+    }
+
+    for (int i = 1; i < plen; i++) {
+        cp = c4m_index(ctx->package, i);
+
+        if (c4m_codepoint_is_c4m_id_continue(cp)) {
+            dot_ok = true;
+            continue;
+        }
+
+        if (cp != '.' || !dot_ok) {
+            return false;
+        }
+
+        // dot_ok being true is really only keeping track of whether
+        // the previous character was a dot; however, the final
+        // character of the package name cannot be a dot.
+        if (i + 1 == plen) {
+            return false;
+        }
+
+        dot_ok = false;
+    }
+
+    for (int i = 1; i < mlen; i++) {
+        cp = c4m_index(ctx->module, i);
+
+        if (!c4m_codepoint_is_c4m_id_continue(cp)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// If this fails due to the source not being found or some other IO
+// error, it will return NULL and add an error to the file compile
+// ctx.
+//
+// However, if you call it wrong, at the API level, it raises an
+// exception.
+//
+// Currently, this is only handling files on the local file system; need
+// to add an API for easier http/https access.
+c4m_stream_t *
+c4m_load_code(c4m_file_compile_ctx *ctx)
+{
+    c4m_stream_t *result;
+
+    if (ctx->scheme != NULL) {
+        C4M_CRAISE("Non-file URI schemes are currently unimplemented.");
+    }
+
+    if (!ctx->path) {
+        C4M_CRAISE("Do not call with a null path.");
+    }
+
+    C4M_TRY
+    {
+        result = c4m_file_instream(ctx->path, C4M_T_UTF8);
+    }
+    C4M_EXCEPT
+    {
+        c4m_compile_error *err = c4m_gc_alloc(c4m_compile_error);
+        err->code              = c4m_err_open_file;
+        err->exception_message = c4m_exception_get_message(C4M_X_CUR());
+
+        c4m_xlist_append(ctx->errors, err);
+        result = NULL;
+    }
+    C4M_TRY_END;
+
+    return result;
+}
diff --git a/src/con4m/frontend/lex.c b/src/con4m/frontend/lex.c
new file mode 100644
index 00000000..ea30058d
--- /dev/null
+++ b/src/con4m/frontend/lex.c
@@ -0,0 +1,1178 @@
+#define C4M_USE_INTERNAL_API
+#include <con4m.h>
+
+typedef struct {
+    char *tt_name;
+    bool  show_contents;
+} internal_tt_info_t;
+
+static internal_tt_info_t tt_info[] = {
+    {"space", false},
+    {";", false},
+    {"newline", false},
+    {"comment", true},
+    {"~", false},
+    {"+", false},
+    {"-", false},
+    {"*", false},
+    {"comment", true},
+    {"/", false},
+    {"%", false},
+    {"<=", false},
+    {"<", false},
+    {">=", false},
+    {">", false},
+    {"!=", false},
+    {"!", false},
+    {":", false},
+    {"=", false},
+    {"==", false},
+    {",", false},
+    {".", false},
+    {"{", false},
+    {"}", false},
+    {"[", false},
+    {"]", false},
+    {"(", false},
+    {")", false},
+    {"and", false},
+    {"or", false},
+    {"int", true},
+    {"hex", true},
+    {"float", true},
+    {"string", true},
+    {"char", true},
+    {"true", false},
+    {"false", false},
+    {"nil", false},
+    {"if", false},
+    {"elif", false},
+    {"else", false},
+    {"for", false},
+    {"from", false},
+    {"to", false},
+    {"break", false},
+    {"continue", false},
+    {"return", false},
+    {"enum", false},
+    {"identifier", true},
+    {"func", false},
+    {"var", false},
+    {"global", false},
+    {"const", false},
+    {":= literal", true},
+    {"`", false},
+    {"->", false},
+    {"object", false},
+    {"while", false},
+    {"in", false},
+    {"&", false},
+    {"|", false},
+    {"^", false},
+    {"<<", false},
+    {">>", false},
+    {"typeof", false},
+    {"switch", false},
+    {"case", false},
+    {"+=", false},
+    {"-=", false},
+    {"*=", false},
+    {"/=", false},
+    {"%=", false},
+    {"&=", false},
+    {"|=", false},
+    {"^=", false},
+    {"<<=", false},
+    {">>=", false},
+    {"start", false},
+    {"eof", false},
+    {"error", false},
+};
+
+typedef struct {
+    c4m_file_compile_ctx *ctx;
+    c4m_codepoint_t      *start;
+    c4m_codepoint_t      *end;
+    c4m_codepoint_t      *pos;
+    c4m_codepoint_t      *line_start;
+    c4m_token_t          *last_token;
+    size_t                token_id;
+    size_t                line_no;
+    size_t                cur_tok_line_no;
+    size_t                cur_tok_offset;
+} lex_state_t;
+
+// These helpers definitely require us to keep names consistent internally.
+//
+// They just remove clutter in calling stuff and emphasize the variability:
+// - TOK adds a token to the output stream of the given kind;
+// - LITERAL_TOK is the same, except the system looks to see if there is
+// - a lit modifier at the end; if there is, it copies it into the token.
+// - LEX_ERROR adds an error to the broader context object, and longjumps.
+#define TOK(kind) output_token(state, kind)
+#define LITERAL_TOK(kind)      \
+    output_token(state, kind); \
+    handle_lit_mod(state)
+#define LEX_ERROR(code)                  \
+    fill_lex_error(state, code);         \
+    printf("Raising exception: " #code); \
+    C4M_CRAISE("Exception:" #code "\n")
+
+static const __uint128_t max_intval = (__uint128_t)0xffffffffffffffffULL;
+
+static inline c4m_codepoint_t
+next(lex_state_t *state)
+{
+    if (state->pos >= state->end) {
+        return 0;
+    }
+    return *state->pos++;
+}
+
+static inline void
+unput(lex_state_t *state)
+{
+    if (state->pos && state->pos < state->end) {
+        --state->pos;
+    }
+}
+
+static inline void
+advance(lex_state_t *state)
+{
+    state->pos++;
+}
+
+static inline c4m_codepoint_t
+peek(lex_state_t *state)
+{
+    if (state->pos + 1 >= state->end) {
+        return 0;
+    }
+    return *(state->pos);
+}
+
+static inline void
+at_new_line(lex_state_t *state)
+{
+    state->line_no++;
+    state->line_start = state->pos;
+}
+
+static inline void
+output_token(lex_state_t *state, c4m_token_kind_t kind)
+{
+    c4m_token_t *tok  = c4m_gc_alloc(c4m_token_t);
+    tok->kind         = kind;
+    tok->start_ptr    = state->start;
+    tok->end_ptr      = state->pos;
+    tok->token_id     = ++state->token_id;
+    tok->line_no      = state->cur_tok_line_no;
+    tok->line_offset  = state->cur_tok_offset;
+    state->last_token = tok;
+
+    c4m_xlist_append(state->ctx->tokens, tok);
+}
+
+static inline void
+skip_optional_newline(lex_state_t *state)
+{
+    c4m_codepoint_t *start = state->pos;
+
+    while (true) {
+        switch (peek(state)) {
+        case ' ':
+        case '\t':
+            advance(state);
+            continue;
+        case '\n':
+            advance(state);
+            at_new_line(state);
+            // We only allow one newline after tokens.  So don't keep
+            // running the same loop; we're done when this one finds
+            // a non-space character.
+            while (true) {
+                switch (peek(state)) {
+                case ' ':
+                case '\t':
+                    advance(state);
+                    continue;
+                default:
+                    goto possible_ws_token;
+                }
+            }
+            // Explicitly fall through here out of the nested switch
+            // since we're done.
+        default:
+possible_ws_token:
+            if (state->pos != start) {
+                TOK(c4m_tt_space);
+            }
+            return;
+        }
+    }
+}
+
+static inline void
+handle_lit_mod(lex_state_t *state)
+{
+    if (peek(state) != '\'') {
+        return;
+    }
+    advance(state);
+
+    c4m_codepoint_t *lm_start = state->pos;
+
+    while (c4m_codepoint_is_c4m_id_continue(peek(state))) {
+        advance(state);
+    }
+
+    size_t       n        = (size_t)(state->pos - lm_start);
+    c4m_token_t *tok      = state->last_token;
+    tok->literal_modifier = c4m_new(c4m_tspec_utf32(),
+                                    c4m_kw("length", c4m_ka(n)));
+    state->start          = state->pos;
+}
+
+static inline void
+fill_lex_error(lex_state_t *state, c4m_compile_error_t code)
+
+{
+    c4m_token_t *tok = c4m_gc_alloc(c4m_token_t);
+    tok->kind        = c4m_tt_lex_error;
+    tok->start_ptr   = state->start;
+    tok->end_ptr     = state->pos;
+    tok->line_no     = state->line_no;
+    tok->line_offset = state->start - state->line_start;
+
+    c4m_compile_error *err = c4m_gc_alloc(c4m_compile_error);
+    err->code              = code;
+    err->current_token     = tok;
+
+    c4m_xlist_append(state->ctx->errors, err);
+}
+
+static inline void
+scan_unquoted_literal(lex_state_t *state)
+{
+    // For now, this just scans to the end of the line, and returns a
+    // token of type c4m_tt_unquoted_lit.  When it comes time to
+    // re-implement the litmod stuff and we add literal parsers for
+    // all the builtins, this can generate the proper token up-front.
+    while (true) {
+        switch (next(state)) {
+        case '\n':
+            at_new_line(state);
+            // fallthrough.
+        case 0:
+            LITERAL_TOK(c4m_tt_unquoted_lit);
+            return;
+        }
+    }
+}
+
+static void
+scan_int_or_float_literal(lex_state_t *state)
+{
+    // This one probably does make more sense to fully parse here.
+    // There is an issue:
+    //
+    // We're using u32 as our internal repr for what we're parsing.
+    // But the easiest way to deal w/ floats is to call strtod(),
+    // which expects UTF8 (well, ASCII really). We don't want to
+    // reconvert (or keep around) the whole remainder of the file, so
+    // we just scan forward looking at absolutely every character than
+    // can possibly be in a valid float (including E/e, but not NaN /
+    // infinity; those will have to be handled as keywords).  We
+    // convert that bit back to UTF-8.
+    //
+    // If did we see a starting character that indicates a float, we
+    // know it might be a float, so we keep a record of where the
+    // first such character is; then we call strtod(); if strtod()
+    // tells us it found a valid parse where the ending point is
+    // farther than the first float indicator, then we're
+    // done; we just need to set the proper token end point.
+    //
+    // Otherwise, we re-parse as an int, and we can just do that
+    // manually into a __uint128_t (getting the float parse precisely
+    // right is not something I relish, even though it can be done
+    // faster than w/ strtod).
+    //
+    // One final note: we already passed the first character before we
+    // got here. But state->start does point to the beginning, so we
+    // use that when we need to reconstruct the string.
+
+    c4m_codepoint_t *start    = state->start;
+    int              ix       = 1; // First index we need to check.
+    int              float_ix = 0; // 0 means not a float.
+
+    while (true) {
+        switch (start[ix]) {
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+            ix++;
+            continue;
+        case '.':
+            if (float_ix) {
+                // Already had a dot or something like that.
+                break;
+            }
+            float_ix = ix++;
+            continue;
+        case 'e':
+        case 'E':
+            if (!float_ix) {
+                float_ix = ix;
+            }
+            ix++;
+            continue;
+        case '+':
+        case '-':
+            ix++;
+            continue;
+        default:
+            break;
+        }
+        break;
+    }
+
+    c4m_utf32_t *u32 = c4m_new(c4m_tspec_utf32(),
+                               c4m_kw("length",
+                                      c4m_ka(ix),
+                                      "codepoints",
+                                      c4m_ka(start)));
+    c4m_utf8_t  *u8  = c4m_to_utf8(u32);
+
+    if (float_ix) {
+        char  *endp  = NULL;
+        double value = strtod((char *)u8->data, &endp);
+
+        if (endp == (char *)u8->data || !endp) {
+            // I don't think this one should ever happen here.
+            LEX_ERROR(c4m_err_lex_invalid_float_lit);
+        }
+
+        if (errno == ERANGE) {
+            if (value == HUGE_VAL) {
+                LEX_ERROR(c4m_err_lex_float_oflow);
+            }
+            LEX_ERROR(c4m_err_lex_float_uflow);
+        }
+
+        int float_strlen = (int)(endp - u8->data);
+        if (float_strlen > float_ix) {
+            state->pos = state->start + float_strlen;
+            LITERAL_TOK(c4m_tt_float_lit);
+            state->last_token->literal_value = (void *)*(uint64_t *)&value;
+            return;
+        }
+    }
+
+    // Either we saw no evidence of a float or the float parse
+    // didn't get to any of that evidence, so voila, it's an int token.
+
+    __int128_t val  = 0;
+    int        i    = 0;
+    size_t     slen = c4m_str_byte_len(u8);
+    char      *p    = (char *)u8->data;
+
+    for (; i < (int64_t)slen; i++) {
+        char c = *p++;
+
+        switch (c) {
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+            val *= 10;
+            val += c - '0';
+            if (val > (uint64_t)max_intval) {
+                LEX_ERROR(c4m_err_lex_int_oflow);
+            }
+            continue;
+        default:
+            goto finished_int;
+        }
+    }
+finished_int: {
+    uint64_t n = (uint64_t)val;
+    state->pos = state->start + i;
+    LITERAL_TOK(c4m_tt_int_lit);
+    state->last_token->literal_value = (void *)n;
+    return;
+}
+}
+
+static inline void
+scan_hex_literal(lex_state_t *state)
+{
+    while (true) {
+        switch (peek(state)) {
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+        case 'a':
+        case 'b':
+        case 'c':
+        case 'd':
+        case 'e':
+        case 'f':
+        case 'A':
+        case 'B':
+        case 'C':
+        case 'D':
+        case 'E':
+        case 'F':
+            advance(state);
+            continue;
+        default:
+            LITERAL_TOK(c4m_tt_hex_lit);
+            return;
+        }
+    }
+}
+
+// This only gets called if we already passed a leading 0. So we
+// inspect the first char; if it's an 'x' or 'X', we go the hex
+// route. Otherwise, we go the int route, which promotes to float
+// depending on what it sees.
+
+static inline void
+scan_int_float_or_hex_literal(lex_state_t *state)
+{
+    switch (peek(state)) {
+    case 'x':
+    case 'X':
+        scan_hex_literal(state);
+        return;
+    default:
+        scan_int_or_float_literal(state);
+        return;
+    }
+}
+
+static inline void
+scan_tristring(lex_state_t *state)
+{
+    // Here, we already got 3 quotes. We now need to:
+    // 1. Keep track of line numbers when we see newlines.
+    // 2. Skip any backtick'd things.
+    // 3. Count consecutive quotes.
+    // 4. Error when we get to EOF.
+
+    int quote_count = 0;
+
+    while (true) {
+        switch (next(state)) {
+        case 0:
+            LEX_ERROR(c4m_err_lex_eof_in_str_lit);
+        case '\n':
+            at_new_line(state);
+            break;
+        case '\\':
+            advance(state);
+            break;
+        case '"':
+            if (++quote_count == 3) {
+                LITERAL_TOK(c4m_tt_string_lit);
+                state->last_token->adjustment = 3;
+                return;
+            }
+            continue; // breaking would reset quote count.
+        default:
+            break;
+        }
+        quote_count = 0;
+    }
+}
+
+static void
+scan_string_literal(lex_state_t *state)
+{
+    // This function only finds the end of the string and keeps track
+    // of line numbers; it does not otherwise attempt to handle any
+    // parsing of the string itself.
+    //
+    // That could either be done after we've seen if there's a lit mod,
+    // or wait until the parser or ir generator need the data;
+    //
+    // My choice is to do it as late as possible, because we could
+    // then allow people to register litmods and then use them in the
+    // same source file (or a dependent source file) if done properly.
+
+    // Here, we know we already passed a single quote. We must first
+    // determine if we're looking at a tristring.
+    if (peek(state) == '"') {
+        advance(state);
+        if (peek(state) != '"') {
+            // empty string.
+            goto finish_single_quote;
+        }
+        advance(state);
+        scan_tristring(state);
+        return;
+    }
+
+    while (true) {
+        c4m_codepoint_t c = next(state);
+
+        switch (c) {
+        case 0:
+            LEX_ERROR(c4m_err_lex_eof_in_str_lit);
+        case '\n':
+        case '\r':
+            LEX_ERROR(c4m_err_lex_nl_in_str_lit);
+        case '\\':
+            // Skip absolutely anything that comes next,
+            // including a newline.
+            advance(state);
+            continue;
+        case '"':
+finish_single_quote:
+            LITERAL_TOK(c4m_tt_string_lit);
+            state->last_token->adjustment = 1;
+            return;
+        default:
+            continue;
+        }
+    }
+}
+
+// Char literals can be:
+// 1. a single character
+// 2. \x, \X, \u, \U .. They're all the same. We scan till ' or some
+//    error condition (which includes another \).
+//    We don't check the value at this point; default char type will
+//    error if it's outside the range of valid unicode. We don't even
+//    check for it being valid hex; we just scan it.
+//    w/ \u and \U I'll probably accept an optional + after the U since
+//    officially that's what the unicode consortium does.
+// 3. \ followed by any single character.
+// -1. If we get a newline or null, it's an error.
+// Also, if we get anything after it other than a ', it's an error.
+//
+// Note specifically that we do NOT turn this into a real char literal
+// here. We wait till needed, so we can apply literal modifiers.
+static void
+scan_char_literal(lex_state_t *state)
+{
+    switch (next(state)) {
+    case 0:
+        LEX_ERROR(c4m_err_lex_eof_in_char_lit);
+    case '\r':
+    case '\n':
+        LEX_ERROR(c4m_err_lex_nl_in_char_lit);
+    case '\'':
+        return;
+    case '\\':
+        switch (next(state)) {
+        case 'x':
+        case 'X':
+        case 'u':
+        case 'U':
+            while (true) {
+                switch (next(state)) {
+                case 0:
+                    LEX_ERROR(c4m_err_lex_eof_in_char_lit);
+                case '\r':
+                case '\n':
+                    LEX_ERROR(c4m_err_lex_nl_in_char_lit);
+                case '\\':
+                    LEX_ERROR(c4m_err_lex_esc_in_esc);
+                case '\'':
+                    goto finish_up;
+                }
+            }
+        default:
+            break;
+        }
+    default:
+        break;
+    }
+    if (next(state) != '\'') {
+        LEX_ERROR(c4m_err_lex_extra_in_char_lit);
+    }
+
+finish_up:
+    LITERAL_TOK(c4m_tt_char_lit);
+    state->last_token->adjustment = 1;
+    return;
+}
+
+static c4m_dict_t *keywords = NULL;
+
+static inline void
+add_keyword(char *keyword, c4m_token_kind_t kind)
+{
+    c4m_utf8_t *s = c4m_new(c4m_tspec_utf8(),
+                            c4m_kw("cstring", c4m_ka(keyword)));
+    hatrack_dict_add(keywords, s, (void *)(int64_t)kind);
+}
+
+static inline void
+init_keywords()
+{
+    if (keywords != NULL) {
+        return;
+    }
+
+    keywords = c4m_new(c4m_tspec_dict(c4m_tspec_utf32(), c4m_tspec_i64()));
+
+    add_keyword("True", c4m_tt_true);
+    add_keyword("true", c4m_tt_true);
+    add_keyword("False", c4m_tt_false);
+    add_keyword("false", c4m_tt_false);
+    add_keyword("nil", c4m_tt_nil);
+    add_keyword("in", c4m_tt_in);
+    add_keyword("var", c4m_tt_var);
+    add_keyword("global", c4m_tt_global);
+    add_keyword("const", c4m_tt_const);
+    add_keyword("is", c4m_tt_cmp);
+    add_keyword("and", c4m_tt_and);
+    add_keyword("or", c4m_tt_or);
+    add_keyword("not", c4m_tt_not);
+    add_keyword("if", c4m_tt_if);
+    add_keyword("elif", c4m_tt_elif);
+    add_keyword("else", c4m_tt_else);
+    add_keyword("case", c4m_tt_case);
+    add_keyword("for", c4m_tt_for);
+    add_keyword("while", c4m_tt_while);
+    add_keyword("from", c4m_tt_from);
+    add_keyword("to", c4m_tt_to);
+    add_keyword("break", c4m_tt_break);
+    add_keyword("continue", c4m_tt_continue);
+    add_keyword("return", c4m_tt_return);
+    add_keyword("enum", c4m_tt_enum);
+    add_keyword("func", c4m_tt_func);
+    add_keyword("object", c4m_tt_object);
+    add_keyword("typeof", c4m_tt_typeof);
+    add_keyword("switch", c4m_tt_switch);
+    add_keyword("infinity", c4m_tt_float_lit);
+    add_keyword("NaN", c4m_tt_float_lit);
+
+    c4m_gc_register_root(&keywords, 1);
+}
+
+static void
+scan_id_or_keyword(lex_state_t *state)
+{
+    init_keywords();
+
+    // The pointer should be over an id_start
+    while (true) {
+        c4m_codepoint_t c = next(state);
+        if (!c4m_codepoint_is_c4m_id_continue(c)) {
+            unput(state);
+            break;
+        }
+    }
+
+    bool    found  = false;
+    int64_t length = (int64_t)(state->pos - state->start);
+
+    if (length == 0) {
+        return;
+    }
+
+    c4m_utf32_t *as_u32 = c4m_new(
+        c4m_tspec_utf32(),
+        c4m_kw("codepoints",
+               c4m_ka(state->start),
+               "length",
+               c4m_ka(length)));
+
+    c4m_token_kind_t r = (c4m_token_kind_t)(int64_t)hatrack_dict_get(
+        keywords,
+        c4m_to_utf8(as_u32),
+        &found);
+
+    if (!found) {
+        TOK(c4m_tt_identifier);
+        return;
+    }
+
+    switch (r) {
+    case c4m_tt_true:
+    case c4m_tt_false:
+        LITERAL_TOK(r);
+        return;
+    case c4m_tt_float_lit: {
+        c4m_utf32_t *u32 = c4m_new(
+            c4m_tspec_utf32(),
+            c4m_kw("length",
+                   c4m_ka((int64_t)(state->pos - state->start)),
+                   "codepoints",
+                   c4m_ka(state->start)));
+
+        c4m_utf8_t *u8    = c4m_to_utf8(u32);
+        double      value = strtod((char *)u8->data, NULL);
+
+        LITERAL_TOK(r);
+        state->last_token->literal_value = *(void **)&value;
+        return;
+    }
+    default:
+        TOK(r);
+        return;
+    }
+}
+
+static void
+lex(lex_state_t *state)
+{
+    while (true) {
+        c4m_codepoint_t c;
+        c4m_codepoint_t tmp;
+
+        // When we need to escape from nested loops after
+        // recognizing a token, it's sometimes easier to short
+        // circuit here w/ a goto than to break out of all those
+        // loops just to 'continue'.
+lex_next_token:
+        state->start           = state->pos;
+        state->cur_tok_line_no = state->line_no;
+        state->cur_tok_offset  = state->start - state->line_start;
+        c                      = next(state);
+
+        switch (c) {
+        case 0:
+            TOK(c4m_tt_eof);
+            return;
+        case ' ':
+        case '\t':
+            while (true) {
+                switch (peek(state)) {
+                case ' ':
+                case '\t':
+                    advance(state);
+                    continue;
+                default:
+                    goto lex_next_token;
+                }
+            }
+            TOK(c4m_tt_space);
+            continue;
+        case '\r':
+            tmp = next(state);
+            if (tmp != '\n') {
+                LEX_ERROR(c4m_err_lex_stray_cr);
+            }
+            // Fallthrough if no exception got raised.
+        case '\n':
+            TOK(c4m_tt_newline);
+            at_new_line(state);
+            continue;
+        case '#':
+            // Line comments go to EOF or new line, and we include the
+            // newline in the token.
+            // Double-slash comments work in con4m too; if we see that,
+            // the lexer jumps back up here once it advances past the
+            // second slash.
+line_comment:
+            while (true) {
+                switch (next(state)) {
+                case '\n':
+                    at_new_line(state);
+                    TOK(c4m_tt_line_comment);
+                    goto lex_next_token;
+                case 0: // EOF
+                    return;
+                default:
+                    continue;
+                }
+            }
+        case '~':
+            TOK(c4m_tt_lock_attr);
+            continue;
+        case '`':
+            TOK(c4m_tt_backtick);
+            continue;
+        case '+':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_plus_eq);
+            }
+            else {
+                TOK(c4m_tt_plus);
+            }
+            skip_optional_newline(state);
+            continue;
+        case '-':
+            switch (peek(state)) {
+            case '=':
+                advance(state);
+                TOK(c4m_tt_minus_eq);
+                break;
+            case '>':
+                advance(state);
+                TOK(c4m_tt_arrow);
+            default:
+                TOK(c4m_tt_minus);
+                break;
+            }
+            skip_optional_newline(state);
+            continue;
+        case '*':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_mul_eq);
+            }
+            else {
+                TOK(c4m_tt_mul);
+            }
+            skip_optional_newline(state);
+            continue;
+        case '/':
+            switch (peek(state)) {
+            case '=':
+                advance(state);
+                TOK(c4m_tt_div_eq);
+                skip_optional_newline(state);
+                break;
+            case '/':
+                advance(state);
+                goto line_comment;
+            case '*':
+                advance(state);
+                while (true) {
+                    switch (next(state)) {
+                    case '\n':
+                        at_new_line(state);
+                        continue;
+                    case '*':
+                        if (peek(state) == '/') {
+                            advance(state);
+                            TOK(c4m_tt_long_comment);
+                            goto lex_next_token;
+                        }
+                        continue;
+                    case 0:
+                        LEX_ERROR(c4m_err_lex_eof_in_comment);
+                    default:
+                        continue;
+                    }
+                }
+            default:
+                TOK(c4m_tt_div);
+                skip_optional_newline(state);
+                break;
+            }
+            continue;
+        case '%':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_mod_eq);
+            }
+            else {
+                TOK(c4m_tt_mod);
+            }
+            skip_optional_newline(state);
+            continue;
+        case '<':
+            switch (peek(state)) {
+            case '=':
+                advance(state);
+                TOK(c4m_tt_lte);
+                break;
+            case '<':
+                advance(state);
+                if (peek(state) == '=') {
+                    advance(state);
+                    TOK(c4m_tt_shl_eq);
+                }
+                else {
+                    TOK(c4m_tt_shl);
+                }
+                break;
+            default:
+                TOK(c4m_tt_lt);
+                break;
+            }
+            skip_optional_newline(state);
+            continue;
+        case '>':
+            switch (peek(state)) {
+            case '=':
+                advance(state);
+                TOK(c4m_tt_gte);
+                break;
+            case '>':
+                advance(state);
+                if (peek(state) == '=') {
+                    advance(state);
+                    TOK(c4m_tt_shr_eq);
+                }
+                else {
+                    TOK(c4m_tt_shr);
+                }
+                break;
+            default:
+                TOK(c4m_tt_gt);
+                break;
+            }
+            skip_optional_newline(state);
+            continue;
+        case '!':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_neq);
+            }
+            else {
+                TOK(c4m_tt_not);
+            }
+            skip_optional_newline(state);
+            continue;
+        case ';':
+            TOK(c4m_tt_semi);
+            continue;
+        case ':':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_assign);
+                state->start           = state->pos;
+                state->cur_tok_line_no = state->line_no;
+                state->cur_tok_offset  = state->start - state->line_start;
+                scan_unquoted_literal(state);
+            }
+            else {
+                TOK(c4m_tt_colon);
+            }
+            continue;
+        case '=':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_cmp);
+            }
+            else {
+                TOK(c4m_tt_assign);
+            }
+            skip_optional_newline(state);
+            continue;
+        case ',':
+            TOK(c4m_tt_comma);
+            skip_optional_newline(state);
+            continue;
+        case '.':
+            TOK(c4m_tt_period);
+            skip_optional_newline(state);
+            continue;
+        case '{':
+            TOK(c4m_tt_lbrace);
+            skip_optional_newline(state);
+            continue;
+        case '}':
+            LITERAL_TOK(c4m_tt_rbrace);
+            continue;
+        case '[':
+            TOK(c4m_tt_lbracket);
+            skip_optional_newline(state);
+            continue;
+        case ']':
+            LITERAL_TOK(c4m_tt_rbracket);
+            continue;
+        case '(':
+            TOK(c4m_tt_lparen);
+            skip_optional_newline(state);
+            continue;
+        case ')':
+            LITERAL_TOK(c4m_tt_rparen);
+            continue;
+        case '&':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_bit_and_eq);
+            }
+            else {
+                TOK(c4m_tt_bit_and);
+            }
+            skip_optional_newline(state);
+            continue;
+        case '|':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_bit_or_eq);
+            }
+            else {
+                TOK(c4m_tt_bit_or);
+            }
+            skip_optional_newline(state);
+            continue;
+        case '^':
+            if (peek(state) == '=') {
+                advance(state);
+                TOK(c4m_tt_bit_xor_eq);
+            }
+            else {
+                TOK(c4m_tt_bit_xor);
+            }
+            skip_optional_newline(state);
+            continue;
+        case '0':
+            scan_int_float_or_hex_literal(state);
+            continue;
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+            scan_int_or_float_literal(state);
+            continue;
+        case '\'':
+            scan_char_literal(state);
+            continue;
+        case '"':
+            scan_string_literal(state);
+            continue;
+        default:
+            if (!c4m_codepoint_is_c4m_id_start(c)) {
+                LEX_ERROR(c4m_err_lex_invalid_char);
+            }
+            scan_id_or_keyword(state);
+            continue;
+        }
+    }
+}
+
+bool
+c4m_lex(c4m_file_compile_ctx *ctx, c4m_stream_t *stream)
+{
+    int outkind;
+    outkind = stream->flags & (C4M_F_STREAM_UTF8_OUT | C4M_F_STREAM_UTF32_OUT);
+
+    c4m_obj_t   *raw = c4m_stream_read_all(stream);
+    c4m_utf32_t *utf32;
+    lex_state_t  lex_info = {
+         .token_id   = 0,
+         .line_no    = 1,
+         .line_start = 0,
+         .ctx        = ctx,
+    };
+
+    if (raw == NULL) {
+        return false;
+    }
+
+    switch (outkind) {
+    case C4M_F_STREAM_UTF32_OUT:
+        utf32 = (c4m_str_t *)raw;
+
+        if (c4m_str_codepoint_len(utf32) == 0) {
+            return false;
+        }
+        break;
+    case C4M_F_STREAM_UTF8_OUT:
+        if (c4m_str_codepoint_len((c4m_utf8_t *)raw) == 0) {
+            return false;
+        }
+
+        utf32 = c4m_to_utf32((c4m_utf8_t *)raw);
+        break;
+    default:
+        // A buffer object, which we assume is utf8.
+        if (c4m_buffer_len((c4m_buf_t *)raw) == 0) {
+            return false;
+        }
+        utf32 = c4m_to_utf32(c4m_buf_to_utf8_string((c4m_buf_t *)raw));
+        break;
+    }
+
+    int len             = c4m_str_codepoint_len(utf32);
+    ctx->raw            = utf32;
+    ctx->tokens         = c4m_new(c4m_tspec_xlist(c4m_tspec_ref()));
+    lex_info.start      = (c4m_codepoint_t *)utf32->data;
+    lex_info.pos        = (c4m_codepoint_t *)utf32->data;
+    lex_info.line_start = (c4m_codepoint_t *)utf32->data;
+    lex_info.end        = &((c4m_codepoint_t *)(utf32->data))[len];
+
+    bool error = false;
+
+    C4M_TRY
+    {
+        lex(&lex_info);
+    }
+    C4M_EXCEPT
+    {
+        error = true;
+    }
+    C4M_TRY_END;
+
+    return !error;
+}
+
+// Start out with any focus on color or other highlighting; just get
+// them into a default table for now aimed at debugging, and we'll add
+// a facility for styling later.
+c4m_grid_t *
+c4m_format_tokens(c4m_file_compile_ctx *ctx)
+{
+    c4m_grid_t *grid = c4m_new(c4m_tspec_grid(),
+                               c4m_kw("start_cols",
+                                      c4m_ka(5),
+                                      "header_rows",
+                                      c4m_ka(1),
+                                      "stripe",
+                                      c4m_ka(true)));
+
+    c4m_xlist_t *row = c4m_new_table_row();
+    int64_t      len = c4m_xlist_len(ctx->tokens);
+
+    c4m_xlist_append(row, c4m_rich_lit("Seq #"));
+    c4m_xlist_append(row, c4m_rich_lit("Type"));
+    c4m_xlist_append(row, c4m_rich_lit("Line #"));
+    c4m_xlist_append(row, c4m_rich_lit("Column #"));
+    c4m_xlist_append(row, c4m_rich_lit("Value"));
+    c4m_grid_add_row(grid, row);
+
+    for (int64_t i = 0; i < len; i++) {
+        c4m_token_t *tok     = c4m_xlist_get(ctx->tokens, i, NULL);
+        int          info_ix = (int)tok->kind;
+
+        row = c4m_new_table_row();
+        c4m_xlist_append(row, c4m_str_from_int(i + 1));
+        c4m_xlist_append(row, c4m_rich_lit(tt_info[info_ix].tt_name));
+        c4m_xlist_append(row, c4m_str_from_int(tok->line_no));
+        c4m_xlist_append(row, c4m_str_from_int(tok->line_offset));
+
+        if (tt_info[info_ix].show_contents) {
+            c4m_xlist_append(
+                row,
+                c4m_new(c4m_tspec_utf32(),
+                        c4m_kw("length",
+                               c4m_ka((int64_t)(tok->end_ptr - tok->start_ptr)),
+                               "codepoints",
+                               c4m_ka(tok->start_ptr))));
+        }
+        else {
+            c4m_xlist_append(row, c4m_rich_lit(" "));
+        }
+
+        c4m_grid_add_row(grid, row);
+    }
+
+    return grid;
+}
diff --git a/src/con4m/grid.c b/src/con4m/grid.c
index 19dc9569..91a814e3 100644
--- a/src/con4m/grid.c
+++ b/src/con4m/grid.c
@@ -188,13 +188,6 @@ c4m_expand_columns(c4m_grid_t *grid, uint64_t num)
         }
     }
 
-    c4m_render_style_t **col_props = c4m_gc_array_alloc(c4m_render_style_t *,
-                                                        new_cols);
-
-    for (int i = 0; i < grid->num_cols; i++) {
-        col_props[i] = grid->col_props[i];
-    }
-
     // This needs a lock.
     grid->cells    = cells;
     grid->num_cols = new_cols;
@@ -217,13 +210,6 @@ c4m_grid_expand_rows(c4m_grid_t *grid, uint64_t num)
         cells[i] = grid->cells[i];
     }
 
-    c4m_render_style_t **row_props = c4m_gc_array_alloc(c4m_render_style_t *,
-                                                        new_rows);
-
-    for (int i = 0; i < grid->num_rows; i++) {
-        row_props[i] = grid->row_props[i];
-    }
-
     grid->cells    = cells;
     grid->num_rows = new_rows;
 }
@@ -285,7 +271,8 @@ c4m_grid_add_row(c4m_grid_t *grid, c4m_obj_t container)
         for (int i = 0; i < grid->num_cols; i++) {
             c4m_obj_t x = c4m_xlist_get((c4m_xlist_t *)container, i, NULL);
             if (x == NULL) {
-                x = (c4m_obj_t)c4m_new(c4m_tspec_utf8(), c4m_kw("cstring", c4m_ka(" ")));
+                x = (c4m_obj_t)c4m_new(c4m_tspec_utf8(),
+                                       c4m_kw("cstring", c4m_ka(" ")));
             }
             c4m_grid_set_cell_contents(grid, grid->row_cursor, i, x);
         }
@@ -376,17 +363,8 @@ grid_init(c4m_grid_t *grid, va_list args)
                                             c4m_ka(grid)));
     grid->self             = self;
 
-    grid->col_props = c4m_gc_array_alloc(c4m_render_style_t *, grid->num_cols);
-    grid->row_props = c4m_gc_array_alloc(c4m_render_style_t *,
-                                         grid->num_rows + spare_rows);
-
-    for (int i = 0; i < min(header_rows, start_rows); i++) {
-        c4m_set_row_style(grid, i, "th");
-    }
-
-    for (int i = 0; i < min(header_cols, start_cols); i++) {
-        c4m_set_column_style(grid, i, "th");
-    }
+    grid->col_props = NULL;
+    grid->row_props = NULL;
 
     grid->header_rows = header_rows;
     grid->header_cols = header_cols;
@@ -395,32 +373,42 @@ grid_init(c4m_grid_t *grid, va_list args)
 static inline c4m_render_style_t *
 get_row_props(c4m_grid_t *grid, int row)
 {
-    if (!grid->row_props[row]) {
-        if (grid->stripe) {
-            if (row % 2) {
-                return c4m_lookup_cell_style("tr.even");
-            }
-            else {
-                return c4m_lookup_cell_style("tr.odd");
-            }
+    c4m_render_style_t *result;
+
+    if (grid->row_props != NULL) {
+        result = hatrack_dict_get(grid->row_props, (void *)(int64_t)row, NULL);
+        if (result != NULL) {
+            return result;
         }
+    }
 
-        return c4m_lookup_cell_style("tr");
+    if (grid->stripe) {
+        if (row % 2) {
+            return c4m_lookup_cell_style("tr.even");
+        }
+        else {
+            return c4m_lookup_cell_style("tr.odd");
+        }
     }
     else {
-        return grid->row_props[row];
+        return c4m_lookup_cell_style("tr");
     }
 }
 
 static inline c4m_render_style_t *
 get_col_props(c4m_grid_t *grid, int col)
 {
-    if (!grid->col_props[col]) {
-        return c4m_lookup_cell_style("td");
-    }
-    else {
-        return grid->col_props[col];
+    c4m_render_style_t *result;
+
+    if (grid->col_props != NULL) {
+        result = hatrack_dict_get(grid->col_props, (void *)(int64_t)col, NULL);
+
+        if (result != NULL) {
+            return result;
+        }
     }
+
+    return c4m_lookup_cell_style("td");
 }
 
 // Contents currently must be a list[list[c4m_obj_t]].  Supply
@@ -1726,7 +1714,7 @@ _c4m_ordered_list(flexarray_t *items, ...)
     width += bp->left_pad + bp->right_pad;
     bp->dims.units = width;
 
-    res->col_props[0] = bp;
+    c4m_set_column_props(res, 0, bp);
     c4m_set_column_style(res, 1, item_style);
 
     for (int i = 0; i < n; i++) {
@@ -1774,7 +1762,7 @@ _c4m_unordered_list(flexarray_t *items, ...)
     c4m_render_style_t *bp = c4m_lookup_cell_style(bullet_style);
     bp->dims.units += bp->left_pad + bp->right_pad;
 
-    res->col_props[0] = bp;
+    c4m_set_column_props(res, 0, bp);
     c4m_set_column_style(res, 1, item_style);
 
     for (int i = 0; i < n; i++) {
@@ -1885,13 +1873,8 @@ c4m_grid_marshal(c4m_grid_t   *grid,
     c4m_marshal_cstring(grid->td_tag_name, s);
     c4m_marshal_cstring(grid->th_tag_name, s);
 
-    for (int i = 0; i < grid->num_cols; i++) {
-        c4m_sub_marshal(grid->col_props[i], s, memos, mid);
-    }
-
-    for (int i = 0; i < grid->num_rows; i++) {
-        c4m_sub_marshal(grid->row_props[i], s, memos, mid);
-    }
+    c4m_sub_marshal(grid->col_props, s, memos, mid);
+    c4m_sub_marshal(grid->row_props, s, memos, mid);
 
     for (int i = 0; i < num_cells; i++) {
         c4m_sub_marshal((c4m_renderable_t *)grid->cells[i], s, memos, mid);
@@ -1915,23 +1898,14 @@ c4m_grid_unmarshal(c4m_grid_t *grid, c4m_stream_t *s, c4m_dict_t *memos)
     grid->stripe      = c4m_unmarshal_i8(s);
     grid->td_tag_name = c4m_unmarshal_cstring(s);
     grid->th_tag_name = c4m_unmarshal_cstring(s);
+    grid->col_props   = c4m_sub_unmarshal(s, memos);
+    grid->row_props   = c4m_sub_unmarshal(s, memos);
 
     size_t num_cells = (grid->num_rows + grid->spare_rows) * grid->num_cols;
     grid->cells      = c4m_gc_array_alloc(c4m_renderable_t *, num_cells);
-    grid->col_props  = c4m_gc_array_alloc(c4m_render_style_t *, grid->num_cols);
-    grid->row_props  = c4m_gc_array_alloc(c4m_render_style_t *,
-                                         grid->num_rows + grid->spare_rows);
 
     num_cells = grid->num_rows * grid->num_cols;
 
-    for (int i = 0; i < grid->num_cols; i++) {
-        grid->col_props[i] = c4m_sub_unmarshal(s, memos);
-    }
-
-    for (int i = 0; i < grid->num_rows; i++) {
-        grid->row_props[i] = c4m_sub_unmarshal(s, memos);
-    }
-
     for (size_t i = 0; i < num_cells; i++) {
         grid->cells[i] = c4m_sub_unmarshal(s, memos);
     }
@@ -2093,6 +2067,52 @@ build_tree_output(c4m_tree_node_t *node, tree_fmt_t *info)
     info->padstr = prev_pad;
 }
 
+void
+c4m_set_column_props(c4m_grid_t *grid, int col, c4m_render_style_t *s)
+{
+    if (grid->col_props == NULL) {
+        grid->col_props = c4m_new(c4m_tspec_dict(c4m_tspec_int(),
+                                                 c4m_tspec_ref()));
+    }
+
+    hatrack_dict_put(grid->col_props, (void *)(int64_t)col, s);
+}
+
+void
+c4m_set_row_props(c4m_grid_t *grid, int row, c4m_render_style_t *s)
+{
+    if (grid->row_props == NULL) {
+        grid->row_props = c4m_new(c4m_tspec_dict(c4m_tspec_int(),
+                                                 c4m_tspec_ref()));
+    }
+
+    hatrack_dict_put(grid->row_props, (void *)(int64_t)row, s);
+}
+
+void
+c4m_set_column_style(c4m_grid_t *grid, int col, char *tag)
+{
+    c4m_render_style_t *style = c4m_lookup_cell_style(tag);
+
+    if (!style) {
+        C4M_CRAISE("Style not found.");
+    }
+
+    c4m_set_column_props(grid, col, style);
+}
+
+void
+c4m_set_row_style(c4m_grid_t *grid, int row, char *tag)
+{
+    c4m_render_style_t *style = c4m_lookup_cell_style(tag);
+
+    if (!style) {
+        C4M_CRAISE("Style not found.");
+    }
+
+    c4m_set_row_props(grid, row, style);
+}
+
 // This currently expects a tree[utf8] or tree[utf32].  Eventually
 // maybe would make it handle anything via it's repr.  However, it
 // should also be restructured to be a single renderable item itself,
diff --git a/src/con4m/numbers.c b/src/con4m/numbers.c
index 1f990dd3..96ef8b65 100644
--- a/src/con4m/numbers.c
+++ b/src/con4m/numbers.c
@@ -450,13 +450,16 @@ u64_parse(char                 *s,
 }
 
 c4m_obj_t
-f64_parse(char *s, c4m_lit_syntax_t st, char *litmod, c4m_lit_error_code_t *code)
+f64_parse(char                 *s,
+          c4m_lit_syntax_t      st,
+          char                 *litmod,
+          c4m_lit_error_code_t *code)
 {
     char   *end;
     double *lit = c4m_new(c4m_tspec_f64());
     double  d   = strtod(s, &end);
 
-    if (end == s || *end) {
+    if (end == s || !*end) {
         *code = LE_InvalidChar;
         return NULL;
     }
diff --git a/src/con4m/object.c b/src/con4m/object.c
index 39a8b285..58dc3657 100644
--- a/src/con4m/object.c
+++ b/src/con4m/object.c
@@ -348,6 +348,16 @@ const c4m_dt_info_t c4m_base_type_info[C4M_NUM_BUILTIN_DTS] = {
         .dt_kind = C4M_DT_KIND_func,
     },
     {
+        // The idea from the library level behind refs is that they
+        // will always be pointers, but perhaps not even to one of our
+        // heaps.
+        //
+        // We need to take this into account if we need to dereference
+        // something here. Currently, this is only used for holding
+        // non-objects internally.
+        //
+        // Once we add proper references to the language, we might split
+        // out such internal references, IDK.
         .name      = "ref",
         .alloc_len = sizeof(void *),
         .ptr_info  = GC_SCAN_ALL,
@@ -356,6 +366,10 @@ const c4m_dt_info_t c4m_base_type_info[C4M_NUM_BUILTIN_DTS] = {
         .hash_fn   = HATRACK_DICT_KEY_TYPE_OBJ_PTR,
     },
     {
+        // This is meant for runtime sum types. It's lightly used
+        // internally, and we may want to do something more
+        // sophisticated when deciding how to support this in the
+        // language proper.
         .name      = "mixed",
         .typeid    = C4M_T_GENERIC,
         .alloc_len = sizeof(c4m_mixed_t),
diff --git a/src/con4m/streams.c b/src/con4m/streams.c
index 8c55bbfc..c1221a70 100644
--- a/src/con4m/streams.c
+++ b/src/con4m/streams.c
@@ -334,7 +334,7 @@ c4m_stream_bytes_to_output(int64_t flags, char *buf, int64_t len)
 // marshal, so we don't have to go through an object to read out
 // things like ints that we plan on returning.
 
-c4m_obj_t
+c4m_obj_t *
 c4m_stream_raw_read(c4m_stream_t *stream, int64_t len, char *buf)
 {
     // If a buffer is provided, return the length and write into
@@ -385,6 +385,54 @@ c4m_stream_raw_read(c4m_stream_t *stream, int64_t len, char *buf)
     }
 }
 
+c4m_obj_t *
+c4m_stream_read_all(c4m_stream_t *stream)
+{
+    c4m_xlist_t *l;
+    int          outkind;
+
+    outkind = stream->flags & (C4M_F_STREAM_UTF8_OUT | C4M_F_STREAM_UTF32_OUT);
+
+    switch (outkind) {
+    case C4M_F_STREAM_UTF8_OUT:
+        l = c4m_new(c4m_tspec_xlist(c4m_tspec_utf8()));
+        break;
+    case C4M_F_STREAM_UTF32_OUT:
+        l = c4m_new(c4m_tspec_xlist(c4m_tspec_utf32()));
+        break;
+    default:
+        // Buffers.
+        l = c4m_new(c4m_tspec_xlist(c4m_tspec_buffer()));
+        break;
+    }
+    while (true) {
+        c4m_obj_t *one = c4m_stream_raw_read(stream, PIPE_BUF, NULL);
+        if (outkind) {
+            if (c4m_str_codepoint_len((c4m_str_t *)one) == 0) {
+                break;
+            }
+        }
+        else {
+            if (c4m_buffer_len((c4m_buf_t *)one) == 0) {
+                break;
+            }
+        }
+        c4m_xlist_append(l, one);
+    }
+    if (outkind) {
+        c4m_str_t *s = c4m_str_join(l, c4m_empty_string());
+
+        if (outkind == C4M_F_STREAM_UTF8_OUT) {
+            return (c4m_obj_t *)c4m_to_utf8(s);
+        }
+        else {
+            return (c4m_obj_t *)c4m_to_utf32(s);
+        }
+    }
+    else {
+        return (c4m_obj_t *)c4m_buffer_join(l, NULL);
+    }
+}
 size_t
 c4m_stream_raw_write(c4m_stream_t *stream, int64_t len, char *buf)
 {
diff --git a/src/con4m/string.c b/src/con4m/string.c
index 769efca1..c1d070cd 100644
--- a/src/con4m/string.c
+++ b/src/con4m/string.c
@@ -420,6 +420,8 @@ c4m_to_utf8(const c4m_utf32_t *inp)
         outloc += l;
     }
 
+    res->byte_len = (int32_t)(outloc - (uint8_t *)res->data);
+
     c4m_copy_style_info(inp, res);
 
     return res;
diff --git a/src/tests/test.c b/src/tests/test.c
index c4f0c413..80e1b77c 100644
--- a/src/tests/test.c
+++ b/src/tests/test.c
@@ -577,6 +577,22 @@ c4m_rich_lit_test()
     c4m_print(test, test, c4m_kw("no_color", c4m_ka(true), "sep", c4m_ka('&')));
 }
 
+void
+test_lex()
+{
+    c4m_str_t            *fname = c4m_rich_lit("../tests/modparam.c4m");
+    c4m_str_t            *mname = c4m_rich_lit("test1");
+    c4m_file_compile_ctx *ctx;
+
+    ctx = c4m_new_compile_ctx(
+        mname,
+        c4m_kw("uri_path", c4m_ka(fname)));
+
+    c4m_stream_t *stream = c4m_load_code(ctx);
+    c4m_lex(ctx, stream);
+    c4m_print(c4m_format_tokens(ctx));
+}
+
 int
 main(int argc, char **argv, char **envp)
 {
@@ -614,6 +630,8 @@ main(int argc, char **argv, char **envp)
         c4m_rich_lit_test();
         c4m_print(c4m_box_u32((int32_t)-1));
         c4m_print(c4m_box_i32((int32_t)-1));
+
+        test_lex();
         C4M_STATIC_ASCII_STR(local_test, "Goodbye!");
         // c4m_ansi_render(local_test, sout);
         c4m_print((c4m_obj_t *)local_test);
diff --git a/tests/abort.c4m b/tests/abort.c4m
new file mode 100644
index 00000000..e6a99cfe
--- /dev/null
+++ b/tests/abort.c4m
@@ -0,0 +1,4 @@
+x = 1
+x += 1
+x += 1
+abort()
\ No newline at end of file
diff --git a/tests/assert.c4m b/tests/assert.c4m
new file mode 100644
index 00000000..bc9cb2ce
--- /dev/null
+++ b/tests/assert.c4m
@@ -0,0 +1,3 @@
+assert 100 > 88
+assert 100 < 88
+assert true != false
diff --git a/tests/assignops.c4m b/tests/assignops.c4m
new file mode 100644
index 00000000..8c46abb2
--- /dev/null
+++ b/tests/assignops.c4m
@@ -0,0 +1,12 @@
+x = 2
+x += 2
+x -= 2
+assert x == 2
+
+for i from 0 to 10 {
+  x += 2
+  assert $len == 10
+}
+
+assert x == 22
+
diff --git a/tests/attrs.c4m b/tests/attrs.c4m
new file mode 100644
index 00000000..95c2b226
--- /dev/null
+++ b/tests/attrs.c4m
@@ -0,0 +1,8 @@
+foo.bar = "hello"
+assert foo.bar == "hello"
+foo.bar = "goodbye"
+assert foo.bar == "goodbye"
+boo.hoo = [0,1,2,3]
+assert boo.hoo[2] == 2
+boo.hoo[2] = 4
+assert boo.hoo[2] == 4
diff --git a/tests/breaks.c4m b/tests/breaks.c4m
new file mode 100644
index 00000000..0b7d1c17
--- /dev/null
+++ b/tests/breaks.c4m
@@ -0,0 +1,8 @@
+x = 12
+x = x +
+/* Some
+Long
+Comment */
+y = x
+
+print(y)
\ No newline at end of file
diff --git a/tests/builtins.c4m b/tests/builtins.c4m
new file mode 100644
index 00000000..476ad34f
--- /dev/null
+++ b/tests/builtins.c4m
@@ -0,0 +1,6 @@
+x = "Hello, world!"'h1
+
+print(x)
+print(repr(x))
+print(osname())
+print(arch())
\ No newline at end of file
diff --git a/tests/cb.c4m b/tests/cb.c4m
new file mode 100644
index 00000000..4083409b
--- /dev/null
+++ b/tests/cb.c4m
@@ -0,0 +1,10 @@
+func somethingcool(x: int) {
+  return true
+}
+
+x = func somethingcool(int) -> bool
+y = func print(`x) -> void
+
+assert repr(x) == "func somethingcool(int) -> bool"
+assert repr(y) == "func con4m_print(`x) -> void"
+assert x(2) == true
diff --git a/tests/crash.c4m b/tests/crash.c4m
new file mode 100644
index 00000000..a9949e12
--- /dev/null
+++ b/tests/crash.c4m
@@ -0,0 +1,5 @@
+func crasher(x) {
+ return crasher(x + 1)
+}
+
+crasher(1)
\ No newline at end of file
diff --git a/tests/dict.c4m b/tests/dict.c4m
new file mode 100644
index 00000000..ecf5d357
--- /dev/null
+++ b/tests/dict.c4m
@@ -0,0 +1,9 @@
+x = { "foo" : "Bar" }
+x["bar"] = "Foo"
+print(x["bar"])
+print(x["foo"])
+print(x["boz"])
+
+
+
+
diff --git a/tests/docstrings.c4m b/tests/docstrings.c4m
new file mode 100644
index 00000000..dc19062e
--- /dev/null
+++ b/tests/docstrings.c4m
@@ -0,0 +1,35 @@
+"""
+This is my module.
+Here is one of its two doc strings.
+"""
+"Here's it's other doc."
+
+extern callecho(a: ptr) -> cvoid {
+  "This has some docs too."
+  
+  local: print(x: string) -> void
+  pure: false
+}
+
+extern echoanint(a: cint) -> cvoid {
+  local: print(x: int) -> void
+  pure: false
+}
+
+func fib(x) {
+  "Also doc'd."
+  "And double doc'd."
+  
+  switch x {
+    case 0:
+        return 0
+    case 1:
+        x = x - x + 1
+        return x
+    else:
+        return fib(x - 2) + fib(x - 1)
+  }
+}
+
+x = fib(10)
+assert x == 55
diff --git a/tests/extern2.c4m b/tests/extern2.c4m
new file mode 100644
index 00000000..2b5972d3
--- /dev/null
+++ b/tests/extern2.c4m
@@ -0,0 +1,9 @@
+extern callecho(a: ptr) -> ptr {
+  local: print(string) -> void
+  pure: false
+}
+
+ extern splitwrap(a: cstring, cstring) -> ptr {
+  local: split(x: string, y: string) -> list[string]
+  pure: true
+}
diff --git a/tests/extern_syntax.c4m b/tests/extern_syntax.c4m
new file mode 100644
index 00000000..6194caf3
--- /dev/null
+++ b/tests/extern_syntax.c4m
@@ -0,0 +1,32 @@
+extern callecho(a: ptr) -> ptr {
+  local: print(x: string) -> void
+  pure: false
+}
+
+extern echoanint(a: cint) -> cvoid {
+  local: print(x: int) -> void
+  pure: false
+}
+
+extern exit(status: cint) -> cvoid {
+  local: exit(x: int) -> void
+  pure: false
+}
+
+extern abort() -> cvoid {
+  local: abort() -> void
+  pure: false
+}
+
+ extern splitwrap(a: cstring, cstring) -> ptr {
+  local: split(x: string, y: string) -> list[string]
+  pure: true
+}
+
+extern callecho2(a: ptr, b: ptr) -> ptr {
+  local: print(x: string) -> void
+  pure: false
+  holds: a
+  allocs: b, return
+}
+
diff --git a/tests/fib.c4m b/tests/fib.c4m
new file mode 100644
index 00000000..868b7064
--- /dev/null
+++ b/tests/fib.c4m
@@ -0,0 +1,9 @@
+func n(m) {
+  if m <= 1 {
+    return 1
+  }
+  
+  return n(m - 1) + n(m - 2)
+}
+
+assert n(18) == 4181
\ No newline at end of file
diff --git a/tests/labels.c4m b/tests/labels.c4m
new file mode 100644
index 00000000..b4903217
--- /dev/null
+++ b/tests/labels.c4m
@@ -0,0 +1,19 @@
+outer:
+  while true {
+    inner:
+    while true {
+      break outer
+    }
+  }
+
+outer:
+while true {
+  inner:
+  while true {
+    break outer
+  }
+}
+
+outer: while true {
+ break
+}
\ No newline at end of file
diff --git a/tests/list.c4m b/tests/list.c4m
new file mode 100644
index 00000000..b5639e69
--- /dev/null
+++ b/tests/list.c4m
@@ -0,0 +1,35 @@
+# Tests basic functionality of lists built into the language:
+# 1. copying on assignment
+# 2. container iteration
+# 3. indexing
+# 4. assignment to an index
+# 5. slicing
+# 6. assigning slices
+
+x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+y = x
+
+for item in y {
+  assert item == ($i + 1)
+  x[$i] = 10
+}
+
+for item in x {
+  assert item == 10
+  assert y[$i] == $i + 1
+}
+
+sum = 0
+
+for item in x[1:-1] {
+  sum += item
+}
+
+assert sum == 80
+x[1:-1] = [5]
+sum = 0
+
+for item in x {
+  sum += item
+}
+assert sum == 25
\ No newline at end of file
diff --git a/tests/lock.c4m b/tests/lock.c4m
new file mode 100644
index 00000000..bcefcb1e
--- /dev/null
+++ b/tests/lock.c4m
@@ -0,0 +1,3 @@
+~foo.bar = "hello"
+foo.bar = "goodbye"
+
diff --git a/tests/lock2.c4m b/tests/lock2.c4m
new file mode 100644
index 00000000..ed41f436
--- /dev/null
+++ b/tests/lock2.c4m
@@ -0,0 +1,6 @@
+foo.bar = "hello"
+assert foo.bar == "hello"
+~foo.bar
+foo.bar = "goodbye"
+assert foo.bar == "goodbye"
+
diff --git a/tests/modparam.c4m b/tests/modparam.c4m
new file mode 100644
index 00000000..b618ea18
--- /dev/null
+++ b/tests/modparam.c4m
@@ -0,0 +1,30 @@
+func value_provider() {
+  return 101
+}
+
+func example_checker(x) {
+  result = ""
+
+  if (x % 2) != 0 {
+    result = "Parameter value must be even."
+  }
+}
+
+parameter var example1 {
+  "This should be some documentation."
+  "Also this."
+  default:  100
+  validator: func example_checker(int) -> string
+}
+
+parameter var example2 {
+  "This should be some documentation."
+  "Also this."
+  initialize:  func value_provider() 
+  validator: func example_checker(int) -> string
+}
+
+# Neither of these should happen.
+print(example1)
+print(example2)
+assert false
\ No newline at end of file
diff --git a/tests/olitmod.c4m b/tests/olitmod.c4m
new file mode 100644
index 00000000..d3eed5c6
--- /dev/null
+++ b/tests/olitmod.c4m
@@ -0,0 +1,6 @@
+shouldwork = "2 gb"'sz
+
+var shouldnt: duration
+
+shouldnt = "2 gb"'sz
+
diff --git a/tests/olits.c4m b/tests/olits.c4m
new file mode 100644
index 00000000..95673864
--- /dev/null
+++ b/tests/olits.c4m
@@ -0,0 +1,2 @@
+way1 = "2 gb"'sz
+qwoi := 2 gb
diff --git a/tests/params.c4m b/tests/params.c4m
new file mode 100644
index 00000000..e6c27b2d
--- /dev/null
+++ b/tests/params.c4m
@@ -0,0 +1,39 @@
+# This will obviously infer `int`, but I eventually want to warn on
+# this situation, which is why I've written it this way.
+#
+# Also, currently, we don't accept the type removal here; we *could*
+# infer list | dict | tuple, but that complicates the code generation
+# a bit.
+#
+# But I'm not sure that's particularly necessary. Once there is enough
+# API, being able to leverage info about the calls made will generally
+# be able to help us make correct inferences.
+
+func l_test(n: list[`t]) {
+  n[2] = 100
+}
+
+func d_test(d: dict[`t, `v]) {
+  d[1] = 4
+}
+
+func t_test(t: tuple[`x, `y]) {
+  t[0] = 1
+}
+
+
+# Testing to make sure that containers have reference semantics.
+
+x = [1, 2, 3, 4, 5, 6, 7]
+
+l_test(x)
+
+assert x[2] == 100
+
+z = (2, 3)
+t_test(z)
+assert z[0] == 1
+
+y = { 1: 2, 3 : 4 }
+d_test(y)
+assert y[1] == 4
\ No newline at end of file
diff --git a/tests/resume1.c4m b/tests/resume1.c4m
new file mode 100644
index 00000000..83f56839
--- /dev/null
+++ b/tests/resume1.c4m
@@ -0,0 +1,17 @@
+confspec {
+  singleton example {
+    field intval {
+      type:    int
+      default: 0
+    
+    }
+  }
+  
+  root {
+    allow: example
+  }
+}
+
+print(example.intval)
+example.intval += 1
+print(example.intval)
\ No newline at end of file
diff --git a/tests/rich.c4m b/tests/rich.c4m
new file mode 100644
index 00000000..f44371fa
--- /dev/null
+++ b/tests/rich.c4m
@@ -0,0 +1,29 @@
+func crappylen(x: list[`t]) {
+  for i in x {
+    return $len
+  }
+}
+
+r = "Hello, world!"'h2
+
+r.print()
+
+n =  [["Name",      "Country",   "City"],
+      ["John"'em,     "USA",     "NYC"],
+      ["Brandon"'em,  "USA",     "NYC"],
+      ["Liming"'em,   "USA",     "NYC"],
+      ["Miroslav"'em, "USA",     "Lawn Guy Land"],
+      ["Rich"'em,     "USA",     "Providence, RI"],
+      ["Matt"'em,     "USA",     "Orange"],
+      ["Mark"'em,     "UK",      "Brighton"],
+      ["Hugo"'em,     "UK",      "Brighton"],
+      ["James"'em,    "UK",      "Brighton"],
+      ["Max"'em,      "UK",      "Brighton"],
+      ["Theo"'em,     "Greece",  "Athens"],
+      ["Thomas"'em,   "Greece",  "Athens"],
+      ["James II"'em, "Germany", "Berlin"]]
+
+assert crappylen(n) == len(n)
+
+print("Hello, world!"'h1 + "Here's " + "emphasis"'em + " for my table: ")
+print(n.table())
diff --git a/tests/richmarshal.c4m b/tests/richmarshal.c4m
new file mode 100644
index 00000000..3f218dd4
--- /dev/null
+++ b/tests/richmarshal.c4m
@@ -0,0 +1,3 @@
+r = "Hello, world!"'h2 + "Yay!"'h4
+
+r.print()
\ No newline at end of file
diff --git a/tests/sections.c4m b/tests/sections.c4m
new file mode 100644
index 00000000..9bba7620
--- /dev/null
+++ b/tests/sections.c4m
@@ -0,0 +1,24 @@
+## all of these should work
+hello {}
+hello world {}
+hello "world" {}
+hello {
+  world : 2
+}
+
+hello {
+  world {
+  
+  }
+}
+
+# These no longer work, as it is too easy to do this on accident.
+# But the error probably should be improved.
+
+hello world
+hello "world"
+
+# This should not work; should give a use-before-def.
+# But for now this is disabled.
+foo
+
diff --git a/tests/sigoverlap.c4m b/tests/sigoverlap.c4m
new file mode 100644
index 00000000..557abae1
--- /dev/null
+++ b/tests/sigoverlap.c4m
@@ -0,0 +1,24 @@
+func ex2(x: `t) {
+  typeof x {
+    case int, i32:
+       print("hi")
+    case dict[`t, int]:
+        print("Int value")
+    case dict[string, string]:
+       print("Word.")
+  }
+}
+
+func ex2(lmno: `t) {
+  typeof lmno {
+    case int, i32:
+       print("hi")
+    case dict[`t, int]:
+        print("Int value")
+    case dict[string, string]:
+       print("Word.")
+    else {
+       print("foo")
+    }
+  }
+}
diff --git a/tests/spec1.c4m b/tests/spec1.c4m
new file mode 100644
index 00000000..a962295b
--- /dev/null
+++ b/tests/spec1.c4m
@@ -0,0 +1,44 @@
+func somethingcool(x: int) {
+  return true
+}
+
+confspec {
+
+  singleton test {  
+    user_def_ok: true
+    #validator: func somethingcool(int) -> bool
+    
+    field audit_id {
+      type:   int
+      default: 176
+      range: 0, 100
+      #validator: func somethingcool(int) -> bool
+    }
+    
+  }
+  
+  named test2 {
+    field audit_location {
+      type:   string
+      default: "test 1"
+      #validator: func somethingcool(int) -> bool
+    }
+  }
+
+  root {
+    allow:   test
+    require: test2
+    
+    field log_level {
+     type:    string
+     require: true
+     default: "info"
+     choices: ["trace", "info", "warn", "error", "fatal"]
+    }
+  }
+}
+
+assert test.audit_id == 176
+assert log_level == "info"
+log_level = "warn"
+assert log_level == "warn"
\ No newline at end of file
diff --git a/tests/spec2.c4m b/tests/spec2.c4m
new file mode 100644
index 00000000..1fb80ea5
--- /dev/null
+++ b/tests/spec2.c4m
@@ -0,0 +1,38 @@
+confspec {
+
+  singleton test {  
+    user_def_ok: true
+
+    field max {
+      type: int
+      default: 0xffffffffffffffff
+      lock: true
+    }
+    
+    field audit_id {
+      type:   string
+      default: "foo"
+      range: "please", "fail"
+    }
+  }
+  
+  named test2 {
+    field audit_location {
+      type:   string
+      default: "test 1"
+    }
+  }
+
+  root {
+    allow: test
+    allow: test2
+    
+    field log_level {
+     type:    string
+     require: true
+     default: "info"
+     choices: ["trace", "info", "warn", "error", "fatal"]
+    }
+  }
+}
+
diff --git a/tests/str.c4m b/tests/str.c4m
new file mode 100644
index 00000000..772ab74a
--- /dev/null
+++ b/tests/str.c4m
@@ -0,0 +1,7 @@
+s = ["this", "is", "a", "test"]
+
+l = join(s, " ")
+assert join(s, " ") == "this is a test"
+assert upper(l) == "THIS IS A TEST"
+assert split(join(s, " "), " ") == s
+assert pad("foo", 10) == "foo       "
\ No newline at end of file
diff --git a/tests/tup.c4m b/tests/tup.c4m
new file mode 100644
index 00000000..4ff95886
--- /dev/null
+++ b/tests/tup.c4m
@@ -0,0 +1,25 @@
+x = (1, "foo", 3)
+#assert x[0] == 1
+#assert x[1] == "foo"
+#assert x[2] == 3
+
+y = x
+y[1] = "blah"
+
+assert x[0] == 1
+assert x[1] == "foo"
+assert x[2] == 3
+
+x[0] = 4
+
+assert y[0] == 1
+assert y[1] == "blah"
+assert y[2] == 3
+
+x[1] = y[1]
+
+(a, b, c) = x
+
+assert a == 4
+assert b == "blah"
+assert c == 3
\ No newline at end of file
diff --git a/tests/use1.c4m b/tests/use1.c4m
new file mode 100644
index 00000000..c24c93de
--- /dev/null
+++ b/tests/use1.c4m
@@ -0,0 +1,3 @@
+use use2 from "."
+
+assert(fact(4) == 24)
diff --git a/tests/use2.c4m b/tests/use2.c4m
new file mode 100644
index 00000000..7d87c631
--- /dev/null
+++ b/tests/use2.c4m
@@ -0,0 +1,7 @@
+func fact(x) {
+  if x < 2 {
+    return 1;
+  }
+
+  return x * fact(x - 1);
+}
\ No newline at end of file
diff --git a/tests/usemissing.c4m b/tests/usemissing.c4m
new file mode 100644
index 00000000..73cbc6bc
--- /dev/null
+++ b/tests/usemissing.c4m
@@ -0,0 +1 @@
+use missing
\ No newline at end of file
diff --git a/tests/valueof.c4m b/tests/valueof.c4m
new file mode 100644
index 00000000..1904b934
--- /dev/null
+++ b/tests/valueof.c4m
@@ -0,0 +1,14 @@
+func fib(x) {
+  switch x {
+    case 0:
+        return 0
+    case 1:
+        return 1
+    else:
+        return fib(x - 2) + fib(x - 1)
+  }
+}
+
+x = fib(10)
+assert x == 55
+assert repr(x) == "55"