Tsonnet #26 - Chain me maybe, part 2

#tsonnet #jsonnet #compiler

Welcome to the Tsonnet series!

If you're not following the series so far, you can check out how it all started in the first post of the series.

In the previous post, I added support for chained field access in Tsonnet:

Tsonnet #25 - Chain me maybe, part 1

Hercules Lemke Merscher ・ Oct 28

#tsonnet #jsonnet #compiler

Now it's time to make sure this functionality doesn't have any leaks and lock things down with proper testing.

Testing object interpretation: no reference leaks allowed

We need to guarantee that object interpretation isn't accidentally leaking references through self and $. These should be strictly scoped to the object they belong to, and they shouldn't stick around in the environment after we're done interpreting an object.

Let's add comprehensive tests to test/test_interpreter.ml:

open Alcotest
open Tsonnet__Ast
open Tsonnet__Interpreter
open Tsonnet__Syntax_sugar

module Env = Tsonnet__Env

let test_interpret_object_no_self () =
  let entries = [
    ObjectField ("x", Number (dummy_pos, Int 1));
    ObjectField ("y", Number (dummy_pos, Int 2));
  ] in
  match interpret_object Env.empty (dummy_pos, entries) with
  | Ok (env, _) ->
    let has_self = Env.find_opt "self" env in
    check (option reject) "Environment should not contain 'self'" None has_self
  | Error msg ->
    fail ("test_interpret_object_no_self failed: " ^ msg)

let test_interpret_object_no_dollar () =
  let entries = [
    ObjectField ("x", Number (dummy_pos, Int 1));
    ObjectField ("y", Number (dummy_pos, Int 2));
  ] in
  match interpret_object Env.empty (dummy_pos, entries) with
  | Ok (env, _) ->
    let has_dollar = Env.find_opt "$" env in
    check (option reject) "Environment should not contain '$'" None has_dollar
  | Error msg ->
    fail ("test_interpret_object_no_dollar failed: " ^ msg)

let test_interpret_object_empty_no_self () =
  match interpret_object Env.empty (dummy_pos, []) with
  | Ok (env, _) ->
    let has_self = Env.find_opt "self" env in
    check (option reject) "Empty object environment should not contain 'self'" None has_self
  | Error msg ->
    fail ("test_interpret_object_empty_no_self failed: " ^ msg)

let test_interpret_object_empty_no_dollar () =
  match interpret_object Env.empty (dummy_pos, []) with
  | Ok (env, _) ->
    let has_dollar = Env.find_opt "$" env in
    check (option reject) "Empty object environment should not contain '$'" None has_dollar
  | Error msg ->
    fail ("test_interpret_object_empty_no_dollar failed: " ^ msg)

let test_interpret_object_with_local_no_self () =
  let obj_entries = [
    ObjectExpr (Local (dummy_pos, [("a", Number (dummy_pos, Int 10))]));
    ObjectField ("x", Ident (dummy_pos, "a"));
  ] in
  match interpret_object Env.empty (dummy_pos, obj_entries) with
  | Ok (env, _) ->
    let has_self = Env.find_opt "self" env in
    check (option reject) "Object with local should not have 'self' in env" None has_self
  | Error msg ->
    fail ("test_interpret_object_with_local_no_self failed: " ^ msg)

let test_interpret_object_with_local_no_dollar () =
  let obj_entries = [
    ObjectExpr (Local (dummy_pos, [("a", Number (dummy_pos, Int 10))]));
    ObjectField ("x", Ident (dummy_pos, "a"));
  ] in
  match interpret_object Env.empty (dummy_pos, obj_entries) with
  | Ok (env, _) ->
    let has_dollar = Env.find_opt "$" env in
    check (option reject) "Object with local should not have '$' in env" None has_dollar
  | Error msg ->
    fail ("test_interpret_object_with_local_no_dollar failed: " ^ msg)

let test_interpret_object_preserves_toplevel_when_present () =
  let open Result in
  let run_test () =
    (* Create an outer object and get its $ reference from the environment *)
    let entries = [ObjectField ("x", Number (dummy_pos, Int 1))] in
    let* (outer_env, _) = interpret_object Env.empty (dummy_pos, entries) in

    (* The outer object's $ should not be in the environment (since it started from empty) *)
    let* () = match Env.find_opt "$" outer_env with
      | Some _ -> error "outer object unexpectedly has $"
      | None -> ok ()
    in

    (* Now create an environment with a top-level reference *)
    let* outer_id = Env.Id.generate () in
    let toplevel_ptr = ObjectPtr (outer_id, TopLevel) in
    let env_with_toplevel = Env.add_local "$" toplevel_ptr outer_env in

    (* Interpret an object that contains a nested inner object - this is where $ could be overridden *)
    let nested_obj = ParsedObject (dummy_pos, [ObjectField ("z", Number (dummy_pos, Int 3))]) in
    let inner_entries = [ObjectField ("y", nested_obj)] in
    let* (inner_env, _) = interpret_object env_with_toplevel (dummy_pos, inner_entries) in

    (* The inner object should preserve the $ reference *)
    match Env.find_opt "$" inner_env with
    | Some preserved_ref ->
      check bool "Inner object should preserve outer '$' reference" true (preserved_ref = toplevel_ptr);
      ok ()
    | None ->
      error "$ was not preserved"
  in
  match run_test () with
  | Ok () -> ()
  | Error msg -> fail ("test_interpret_object_preserves_toplevel_when_present: " ^ msg)

let () =
  run "Interpreter" [
    "interpret_object", [
      test_case "object with fields returns no 'self' in env" `Quick test_interpret_object_no_self;
      test_case "object with fields returns no '$' in env" `Quick test_interpret_object_no_dollar;
      test_case "empty object returns no 'self' in env" `Quick test_interpret_object_empty_no_self;
      test_case "empty object returns no '$' in env" `Quick test_interpret_object_empty_no_dollar;
      test_case "object with local returns no 'self' in env" `Quick test_interpret_object_with_local_no_self;
      test_case "object with local returns no '$' in env" `Quick test_interpret_object_with_local_no_dollar;
      test_case "nested object preserves outer '$' reference" `Quick test_interpret_object_preserves_toplevel_when_present;
    ];
  ]

These tests verify that self and $ don't leak into the environment after object interpretation completes. They also check that nested objects preserve the top-level $ reference properly.

And guess what? The tests immediately caught a bug:

diff --git a/lib/interpreter.ml b/lib/interpreter.ml
index 227007e..fe7f1a7 100644
--- a/lib/interpreter.ml
+++ b/lib/interpreter.ml
@@ -106,6 +106,7 @@ and interpret_array env (pos, exprs) =

 and interpret_object env (pos, entries) =
   let* obj_id = Env.Id.generate () in
+  let had_toplevel = Option.is_some (Env.find_opt "$" env) in
   let self_expr = ObjectPtr (obj_id, Self) in
   let env' = Env.add_local "self" self_expr env in
   let env', toplevel_expr = Env.add_local_when_not_present "$" (ObjectPtr (obj_id, TopLevel)) env' in
@@ -147,7 +148,7 @@ and interpret_object env (pos, entries) =
   (* Remove self and $ from the resulting environment.
      Posterior interpretations shouldn't have references to them. *)
   let env' = Env.Map.remove "self" env' in
-  let env' = Env.Map.remove "$" env' in
+  let env' = if had_toplevel then env' else Env.Map.remove "$" env' in

   ok (env', RuntimeObject (pos, obj_id, fields))

The interpret_object function wasn't considering whether the object was the top-level or not. We can't just blindly remove $ from the environment if we're interpreting an object deep within the structure -- all child objects must have access to the top-level reference.

The fix is straightforward: before we start adding things to the environment, we check if $ is already present. If it is, we preserve it; if not, we can safely remove it after interpretation.

The type checker needs the same treatment

The same issue exists in the type checking phase. We need to be careful not to spread bugs around. Let's add test/test_type.ml:

open Alcotest
open Tsonnet__Ast
open Tsonnet__Type
open Tsonnet__Syntax_sugar

module Env = Tsonnet__Env

let test_translate_object_no_self () =
  let entries = [
    ObjectField ("x", Number (dummy_pos, Int 1));
    ObjectField ("y", Number (dummy_pos, Int 2));
  ] in
  match translate_object Env.empty dummy_pos entries with
  | Ok (env, _) ->
    let has_self = Env.find_opt "self" env in
    Alcotest.(check (option reject)) "Environment should not contain 'self'" None has_self
  | Error msg ->
    Alcotest.fail ("test_translate_object_no_self failed: " ^ msg)

let test_translate_object_no_dollar () =
  let entries = [
    ObjectField ("x", Number (dummy_pos, Int 1));
    ObjectField ("y", Number (dummy_pos, Int 2));
  ] in
  match translate_object Env.empty dummy_pos entries with
  | Ok (env, _) ->
    let has_dollar = Env.find_opt "$" env in
    Alcotest.(check (option reject)) "Environment should not contain '$'" None has_dollar
  | Error msg ->
    Alcotest.fail ("test_translate_object_no_dollar failed: " ^ msg)

let test_translate_object_empty_no_self () =
  match translate_object Env.empty dummy_pos [] with
  | Ok (env, _) ->
    let has_self = Env.find_opt "self" env in
    Alcotest.(check (option reject)) "Empty object environment should not contain 'self'" None has_self
  | Error msg ->
    Alcotest.fail ("test_translate_object_empty_no_self failed: " ^ msg)

let test_translate_object_empty_no_dollar () =
  match translate_object Env.empty dummy_pos [] with
  | Ok (env, _) ->
    let has_dollar = Env.find_opt "$" env in
    Alcotest.(check (option reject)) "Empty object environment should not contain '$'" None has_dollar
  | Error msg ->
    Alcotest.fail ("test_translate_object_empty_no_dollar failed: " ^ msg)

let test_translate_object_with_local_no_self () =
  let obj_entries = [
    ObjectExpr (Local (dummy_pos, [("a", Number (dummy_pos, Int 10))]));
    ObjectField ("x", Ident (dummy_pos, "a"));
  ] in
  match translate_object Env.empty dummy_pos obj_entries with
  | Ok (env, _) ->
    let has_self = Env.find_opt "self" env in
    Alcotest.(check (option reject)) "Object with local should not have 'self' in env" None has_self
  | Error msg ->
    Alcotest.fail ("test_translate_object_with_local_no_self failed: " ^ msg)

let test_translate_object_with_local_no_dollar () =
  let obj_entries = [
    ObjectExpr (Local (dummy_pos, [("a", Number (dummy_pos, Int 10))]));
    ObjectField ("x", Ident (dummy_pos, "a"));
  ] in
  match translate_object Env.empty dummy_pos obj_entries with
  | Ok (env, _) ->
    let has_dollar = Env.find_opt "$" env in
    Alcotest.(check (option reject)) "Object with local should not have '$' in env" None has_dollar
  | Error msg ->
    Alcotest.fail ("test_translate_object_with_local_no_dollar failed: " ^ msg)

let test_translate_object_preserves_toplevel_when_present () =
  let open Result in
  let run_test () =
    (* Create an outer object and get its $ reference from the environment *)
    let entries = [ObjectField ("x", Number (dummy_pos, Int 1))] in
    let* (outer_env, _) = translate_object Env.empty dummy_pos entries in

    (* The outer object's $ should not be in the environment (since it started from empty) *)
    let* () = match Env.find_opt "$" outer_env with
      | Some _ -> error "outer object unexpectedly has $"
      | None -> ok ()
    in

    (* Now create an environment with a top-level reference *)
    let* outer_id = Env.Id.generate () in
    let toplevel_ptr = TobjectPtr (outer_id, TobjectTopLevel) in
    let env_with_toplevel = Env.add_local "$" toplevel_ptr outer_env in

    (* Interpret an object that contains a nested inner object - this is where $ could be overridden *)
    let nested_obj = ParsedObject (dummy_pos, [ObjectField ("z", Number (dummy_pos, Int 3))]) in
    let inner_entries = [ObjectField ("y", nested_obj)] in
    let* (inner_env, _) = translate_object env_with_toplevel dummy_pos inner_entries in

    (* The inner object should preserve the $ reference *)
    match Env.find_opt "$" inner_env with
    | Some preserved_ref ->
      Alcotest.(check bool) "Inner object should preserve outer '$' reference" true (preserved_ref = toplevel_ptr);
      ok ()
    | None ->
      error "$ was not preserved"
  in
  match run_test () with
  | Ok () -> ()
  | Error msg -> Alcotest.fail ("test_translate_object_preserves_toplevel_when_present: " ^ msg)

let () =
  run "Type" [
    "translate_object", [
      test_case "object with fields returns no 'self' in env" `Quick test_translate_object_no_self;
      test_case "object with fields returns no '$' in env" `Quick test_translate_object_no_dollar;
      test_case "empty object returns no 'self' in env" `Quick test_translate_object_empty_no_self;
      test_case "empty object returns no '$' in env" `Quick test_translate_object_empty_no_dollar;
      test_case "object with local returns no 'self' in env" `Quick test_translate_object_with_local_no_self;
      test_case "object with local returns no '$' in env" `Quick test_translate_object_with_local_no_dollar;
      test_case "nested object preserves outer '$' reference" `Quick test_translate_object_preserves_toplevel_when_present;
    ];
  ]

And look what we found:

diff --git a/lib/type.ml b/lib/type.ml
index 7ecdb85..99f11cc 100644
--- a/lib/type.ml
+++ b/lib/type.ml
@@ -210,6 +210,7 @@ and translate_lazy venv = function

 and translate_object venv pos entries =
   let* obj_id = Env.Id.generate () in
+  let had_toplevel = Option.is_some (Env.find_opt "$" venv) in
   let venv = Env.add_local "self" (TobjectPtr (obj_id, TobjectSelf)) venv in
   let venv, _ =
     Env.add_local_when_not_present "$" (TobjectPtr (obj_id, TobjectTopLevel)) venv
@@ -257,6 +258,9 @@ and translate_object venv pos entries =
     (ok [])
     entries
   in
+  (* Remove self and $ from the environment to prevent leaking *)
+  let venv = Env.Map.remove "self" venv in
+  let venv = if had_toplevel then venv else Env.Map.remove "$" venv in
   ok (venv, TruntimeObject (obj_id, entry_types))

 and translate_object_field_access venv pos scope chain_exprs =

The type checker was suffering from the exact same problem as the interpreter. I'm glad we have tests now to catch these cases.

Testing the environment helper

Let's also add new tests in test/test_env.ml -- just to be sure the add_local_when_not_present function works correctly:

let test_add_local_when_not_present_adds_new_variable () =
  let new_expr = Number (dummy_pos, Int 42) in
  let (env, returned_expr) = Env.add_local_when_not_present "$" new_expr Env.empty in
  let found_expr = Env.find_opt "$" env in
  Alcotest.(check bool) "adds new variable to environment" true (found_expr = Some new_expr);
  Alcotest.(check bool) "returns the new expression" true (returned_expr = new_expr)

let test_add_local_when_not_present_preserves_existing_variable () =
  let existing_expr = Number (dummy_pos, Int 1) in
  let new_expr = Number (dummy_pos, Int 2) in
  let env_with_existing = Env.add_local "$" existing_expr Env.empty in
  let (env, returned_expr) = Env.add_local_when_not_present "$" new_expr env_with_existing in
  let found_expr = Env.find_opt "$" env in
  Alcotest.(check bool) "preserves existing variable in environment" true (found_expr = Some existing_expr);
  Alcotest.(check bool) "returns the existing expression, not the new one" true (returned_expr = existing_expr)

diff --git a/test/test_env.ml b/test/test_env.ml
index bdff9ef..7b28efc 100644
--- a/test/test_env.ml
+++ b/test/test_env.ml
@@ -102,4 +118,8 @@ let () =
       test_case "Undefined variable" `Quick test_undefined_variable;
       QCheck_alcotest.to_alcotest test_lookup;
     ];
+    "add_local_when_not_present", [
+      test_case "adds new variable when not present" `Quick test_add_local_when_not_present_adds_new_variable;
+      test_case "preserves existing variable when present" `Quick test_add_local_when_not_present_preserves_existing_variable;
+    ];
   ]

Configuring the test suite

We bind everything together by configuring test/dune to include the new test files:

diff --git a/test/dune b/test/dune
index 37c860c..933f549 100644
--- a/test/dune
+++ b/test/dune
@@ -1,3 +1,3 @@
 (tests
- (names test_env)
+ (names test_env test_interpreter test_type)
  (libraries tsonnet alcotest qcheck-core qcheck-alcotest))

Maybe there's a way to configure this through a pattern, but we only have three test files so far. I prefer to keep it simple if it doesn't hurt!

Conclusion

With these tests in place, I can sleep tight knowing that self and $ references are properly scoped and don't leak into the environment where they shouldn't be. The test suite caught subtle bugs that could have caused headaches later, proving once again that investing time in good tests pays off.

The entire diff can be seen here.

Testing isn't the most exciting part of compiler development, but it's absolutely essential. These tests give me confidence that the chained field access feature from the previous post is solid and won't silently break as the codebase evolves.

Thanks for reading Bit Maybe Wise! Subscribe to catch more compiler bugs before they catch you.

Photo by Joe Zlomek on Unsplash