Hercules Lemke Merscher

Posted on Dec 13, 2025 • Originally published at bitmaybewise.substack.com

Tsonnet #29 - Making inner references work

#tsonnet #jsonnet #compiler

Welcome to the Tsonnet series!

If you're not following along, check out how it all started in the first post of the series.

In the previous post, I added ppx_deriving.show to help debug the AST and centralized configuration:

Tsonnet #28 - Debugging gets pretty (printed)

Hercules Lemke Merscher ・ Dec 11

#jsonnet #tsonnet #compiler

Time to tackle another piece of the Jsonnet tutorial: inner references. This is the second part of working with object references, and it gets... interesting.

What we're building

Here's the sample file we need to handle:

// samples/tutorials/inner-reference.jsonnet
{
  Martini: {
    local drink = self,
    ingredients: [
      { kind: "Farmer's Gin", qty: 1 },
      {
        kind: 'Dry White Vermouth',
        qty: drink.ingredients[0].qty,
      },
    ],
    garnish: 'Olive',
    served: 'Straight Up',
  },
}

The tricky bit? That local drink = self inside the Martini object. The variable drink holds a reference to the object itself, and then we use it to access drink.ingredients[0].qty. This is self-reference through an intermediate variable.

The expected output (as a cram test):

diff --git a/test/cram/tutorials.t b/test/cram/tutorials.t
index b9c1f6f..9fe544c 100644
--- a/test/cram/tutorials.t
+++ b/test/cram/tutorials.t
@@ -79,3 +79,15 @@
       "served": "Tall"
     }
   }
+
+  $ tsonnet ../../samples/tutorials/inner-reference.jsonnet
+  {
+    "Martini": {
+      "garnish": "Olive",
+      "ingredients": [
+        { "kind": "Farmer's Gin", "qty": 1 },
+        { "kind": "Dry White Vermouth", "qty": 1 }
+      ],
+      "served": "Straight Up"
+    }
+  }

Extending the AST with variable references

We already have Self and TopLevel for object scopes. Now we need a way to reference objects through variables:

@@ -76,6 +76,7 @@ and object_entry =
 and object_scope =
   | Self
   | TopLevel
+  | ObjVarRef of string
 [@@deriving show]

 let dummy_expr = Unit

Objects also need their own environment. Before, we were storing just an env_id in RuntimeObject, but now we need the full environment to properly resolve these variable references:

@@ -62,7 +62,7 @@ type expr =
   | Ident of position * string
   | Array of position * expr list
   | ParsedObject of position * object_entry list
-  | RuntimeObject of position * (Env.env_id [@opaque]) * ObjectFields.t
+  | RuntimeObject of position * (expr Env.Map.t [@opaque]) * ObjectFields.t
   | ObjectPtr of (Env.env_id [@opaque]) * object_scope
   | ObjectFieldAccess of position * object_scope * expr list
   | BinOp of position * bin_op * expr * expr
@@ -109,9 +111,8 @@ let rec string_of_type = function
   | ParsedObject (_, fields) ->
     Printf.sprintf "PlainObject{%s}"
       (String.concat ", " (List.map string_of_object_entry fields))
-  | RuntimeObject (_, (Env.EnvId id), fields) ->
-    Printf.sprintf "obj<%d>{%s}" id
-      (String.concat ", " (ObjectFields.to_list fields))
+  | RuntimeObject (_, _env, fields) ->
+    Printf.sprintf "obj{%s}" (String.concat ", " (ObjectFields.to_list fields))
   | BinOp (_, bin_op, _, _) ->
     let prefix = "Binary Operation" in
     let bin_op = match bin_op with

Wrestling with the parser

The parser changes are where things get hairy. We need to handle three cases:

self.field or $.field (with optional chains)
drink.field (variable reference requiring at least one field access)
drink[expr] (variable reference with bracket notation)

The second case is crucial for interpreting the inner reference sample file.

Here's the updated grammar:

diff --git a/lib/parser.mly b/lib/parser.mly
index 9d4ca52..6a5b704 100644
--- a/lib/parser.mly
+++ b/lib/parser.mly
@@ -103,38 +103,38 @@ obj_field_list:
   ;

 obj_field_expr:
-  | DOT; e = indexed_expr { e }
+  | DOT; LEFT_SQR_BRACKET; e = assignable_expr; RIGHT_SQR_BRACKET { e }
   | DOT; id = identifier { id }
   ;

+obj_field_chain_item:
+  | e = obj_field_expr { e }
+  | LEFT_SQR_BRACKET; e = assignable_expr; RIGHT_SQR_BRACKET { e }
+  ;
+
 obj_field_chain:
   | { [] }
-  | id = obj_field_expr; ids = obj_field_chain { id :: ids }
+  | id = obj_field_chain_item; ids = obj_field_chain { id :: ids }
+  ;
+
+obj_field_chain_nonempty:
+  | id = obj_field_chain_item { [id] }
+  | id = obj_field_chain_item; ids = obj_field_chain_nonempty { id :: ids }
   ;

 obj_scope:
   | SELF { Self }
   | TOP_LEVEL_OBJ { TopLevel }
+  | id = ID { ObjVarRef id }
   ;

 obj_field_access:
-  | scope = obj_scope; chain = obj_field_chain { ObjectFieldAccess (with_pos $startpos $endpos, scope, chain) }
-  (* The first bracketed expr when accessing an object field
-     must be explicitly declared here, instead of being part
-     of `object_field_expr`.
-
-     Adding the bracketed expr there will make the grammar unclear
-     since Menhir will need to decide between parsing one of the options:
-     1) .identifier
-     2) .identifier[expr]
-
-     By tying to the scope, such as $[expr], the grammar is now clear
-     and Menhir doesn't need to decide on its own.
-  *)
-  | scope = obj_scope;
-    LEFT_SQR_BRACKET; e = assignable_expr; RIGHT_SQR_BRACKET;
-    chain = obj_field_chain
-    { ObjectFieldAccess (with_pos $startpos $endpos, scope, e :: chain) }
+  (* For self and $, allow empty chain *)
+  | SELF; chain = obj_field_chain { ObjectFieldAccess (with_pos $startpos $endpos, Self, chain) }
+  | TOP_LEVEL_OBJ; chain = obj_field_chain { ObjectFieldAccess (with_pos $startpos $endpos, TopLevel, chain) }
+  (* For ID-based scope, only match if there's a dot (obj_field_expr) or multiple bracket accesses *)
+  | id = ID; field = obj_field_expr; chain = obj_field_chain { ObjectFieldAccess (with_pos $startpos $endpos, ObjVarRef id, field :: chain) }
+  | id = ID; LEFT_SQR_BRACKET; e = assignable_expr; RIGHT_SQR_BRACKET; rest = obj_field_chain_nonempty { ObjectFieldAccess (with_pos $startpos $endpos, ObjVarRef id, e :: rest) }
   ;

 %inline number:

The comments in the original code explain it well, but the key insight is: we need separate rules for self/$ (which can stand alone or have chains) versus variable references (which must have at least one field access to be meaningful).

If you're fuzzy on shift/reduce conflicts, I wrote about debugging them here.

Scope validation

The Scope module needs a small update to handle ObjVarRef:

diff --git a/lib/scope.ml b/lib/scope.ml
index 8846f39..ecbf17c 100644
--- a/lib/scope.ml
+++ b/lib/scope.ml
@@ -103,14 +103,19 @@ and validate_object_field_access pos scope context =
     local x = self.field;
     local x = $.field;
     outside of objects *)
-  if not context.in_object
-  then
-    let with_error_msg = match scope with
-                        | Self -> Error.Msg.self_out_of_scope
-                        | TopLevel -> Error.Msg.no_toplevel_object
-    in
-    Error.trace with_error_msg pos >>= error
-  else ok ()
+  match scope with
+  | Self | TopLevel ->
+    if not context.in_object then
+      let with_error_msg = match scope with
+        | Self -> Error.Msg.self_out_of_scope
+        | TopLevel -> Error.Msg.no_toplevel_object
+        | ObjVarRef _ -> "" (* unreachable *)
+      in
+      Error.trace with_error_msg pos >>= error
+    else ok ()
+  | ObjVarRef _ ->
+    (* Variable references are allowed anywhere *)
+    ok ()

 and validate_locals vars context =
   (* This is crucial - it catches: local x = self.field; outside objects *)

Variable references don't have the same scoping restrictions as self and $ -- they're just regular identifiers that happen to point to objects.

Before we proceed to the relevant part

The interpreter went through a more than trivial refactoring.

The interpret_concat_op moved below to be part of the recursive definition of interpret:

diff --git a/lib/interpreter.ml b/lib/interpreter.ml
index 77b3ac2..ca33fd6 100644
--- a/lib/interpreter.ml
+++ b/lib/interpreter.ml
@@ -21,17 +21,6 @@ let interpret_arith_op (op: bin_op) (n1: number) (n2: number) =
   | Divide, (Int a), (Float b) -> Float ((float_of_int a) /. b)
   | Divide, (Float a), (Float b) -> Float (a /. b)

-let interpret_concat_op env (e1 : expr) (e2 : expr) : (expr, string) result =
-  match e1, e2 with
-  | String (_, s1), String (_, s2) ->
-    ok (String (dummy_pos, s1^s2))
-  | String (_, s1), val2 ->
-    let* s2 = Json.expr_to_string (env, val2) in ok (String (dummy_pos, s1^s2))
-  | val1, String (_, s2) ->
-    let* s1 = Json.expr_to_string (env, val1) in ok (String (dummy_pos, s1^s2))
-  | _ ->
-    error Error.Msg.interp_invalid_concat
-

The same has been done with interpret_local and interpret_seq -- it's cleaner to read now:

 let interpret_unary_op (op: unary_op) (evaluated_expr: expr) =
   match op, evaluated_expr with
   | Plus, number -> ok number
@@ -48,6 +37,7 @@ let rec interpret env expr =
   | Array (pos, exprs) -> interpret_array env (pos, exprs)
   | ParsedObject (pos, entries) -> interpret_object env (pos, entries)
   | RuntimeObject _ as runtime_obj -> ok (env, runtime_obj)
+  | ObjectPtr _ as obj_ptr -> ok (env, obj_ptr)
   | ObjectFieldAccess (pos, scope, chain) -> interpret_object_field_access env (pos, scope, chain)
   | Ident (pos, varname) ->
     Env.find_var varname env
@@ -70,16 +60,9 @@ let rec interpret env expr =
     Result.fold (interpret_unary_op op expr')
       ~ok:(fun expr' -> ok (env', expr'))
       ~error:(Error.error_at pos)
-  | Local (_, vars) ->
-    let acc_fun env (varname, expr) = Env.add_local varname expr env in
-    let env' = List.fold_left acc_fun env vars
-    in ok (env', Unit)
+  | Local (_, vars) -> interpret_local env vars
   | Unit -> ok (env, Unit)
-  | Seq exprs ->
-    (match exprs with
-    | [] -> ok (env, Unit)
-    | [expr] -> interpret env expr
-    | (expr :: exprs) -> interpret env expr >>= fun (env', _) -> interpret env' (Seq exprs))
+  | Seq exprs -> interpret_seq env exprs
   | IndexedExpr (pos, varname, index_expr) ->
     let* (env', index_expr') = interpret env index_expr in
     Env.find_var varname env'
@@ -90,8 +73,17 @@ let rec interpret env expr =
           ~error:(Error.error_at pos)
       )
       ~err:(Error.error_at pos)
-    | expr ->
-      error (Error.Msg.interp_cannot_interpret (string_of_type expr))

The only difference in interpret_concat_op is the new eval parameter passed to Json.expr_to_string -- we'll come to it in a bit:

+
+and interpret_concat_op env (e1 : expr) (e2 : expr) : (expr, string) result =
+    match e1, e2 with
+    | String (_, s1), String (_, s2) ->
+      ok (String (dummy_pos, s1^s2))
+    | String (_, s1), val2 ->
+      let* s2 = Json.expr_to_string ~eval:interpret (env, val2) in ok (String (dummy_pos, s1^s2))
+    | val1, String (_, s2) ->
+      let* s1 = Json.expr_to_string ~eval:interpret (env, val1) in ok (String (dummy_pos, s1^s2))
+    | _ ->
+      error Error.Msg.interp_invalid_concat

 and interpret_array env (pos, exprs) =
   let* (env', evaluated_exprs) = List.fold_left
@@ -104,14 +96,44 @@ and interpret_array env (pos, exprs) =
     exprs
   in ok (env', Array (pos, evaluated_exprs))

And interpret_seq did not change a thing, it's just wrapped in its own function now:

+and interpret_seq env exprs =
+  match exprs with
+  | [] -> ok (env, Unit)
+  | [expr] -> interpret env expr
+  | (expr :: exprs') ->
+    interpret env expr >>= fun (env', _) ->
+    interpret env' (Seq exprs')
+

Lazy evaluation arrives

This is where the interpreter changes get substantial. Up until now, we were eagerly evaluating in multiple parts of the code. But with inner references, we need proper lazy evaluation to avoid infinite loops.

Consider this pathological case:

{
  local drink = self,
  x: drink.x
}

If we evaluate drink.x eagerly, we'd recurse infinitely. The solution is to only evaluate object fields when they're actually accessed.

Here's the new interpret_local:

and interpret_local env vars =
  let* env' =
    List.fold_left
      (fun acc (varname, expr) ->
        let* env = acc in
        match expr with
        | ObjectFieldAccess (_, (Self | TopLevel), []) ->
          (* Eagerly evaluate unchained self/$ references to capture the current object.
            AST example:
            (Ast.Local (3:3, [("drink", (Ast.ObjectFieldAccess (3:3, Ast.Self, [])))])));
          *)
          let* (env', evaluated_expr) = interpret env expr in
          ok (Env.add_local varname evaluated_expr env')
        | _ ->
          (* Other expressions remain lazy *)
          ok (Env.add_local varname expr env)
      )
      (ok env)
      vars
  in ok (env', Unit)

The exception is self and $ with no field chain -- we need to evaluate those immediately to capture the concrete object ID. For everything else, we store the unevaluated expression.

I'm pretty sure there's some edge cases hiding in this part as we progress in the implementation, but this is enough to make the current tests to pass--baby steps is my mantra.

Simplifying object interpretation

The interpret_object function actually got simpler:

 and interpret_object env (pos, entries) =
   let* obj_id = Env.Id.generate () in
-  let had_toplevel = Option.is_some (Env.find_opt "$" env) in
-  let self_expr = ObjectPtr (obj_id, Self) in
-  let env' = Env.add_local "self" self_expr env in
-  let env', toplevel_expr = Env.add_local_when_not_present "$" (ObjectPtr (obj_id, TopLevel)) env' in
+  let obj_env = Env.add_local "self" (ObjectPtr (obj_id, Self)) env in
+  let obj_env, _ = Env.add_local_when_not_present "$" (ObjectPtr (obj_id, TopLevel)) obj_env in
+
   (* First add locals and object fields to env *)
-  let* (env', fields) = List.fold_left
+  let* (obj_env, fields) = List.fold_left
     (fun result entry ->
       let* (env', fields) = result in
       match entry with
@@ -120,80 +142,87 @@ and interpret_object env (pos, entries) =
           it will add the expr to the environment *)
         let* (env', _) = interpret env' expr in ok (env', fields)
       | ObjectField (name, expr) ->
+        (* Object fields are kept lazy -- they will be evaluated only when accessed.
+           This prevents infinite loops from circular references. *)
         let env' = Env.add_obj_field name expr obj_id env' in
         ok (env', ObjectFields.add name fields)
     )
-    (ok (env', ObjectFields.empty))
+    (ok (obj_env, ObjectFields.empty))
     entries
   in
-  (* Then interpret object fields after env is populated *)
-  let* env' = ObjectFields.fold
-    (fun field acc ->
-      let* env' = acc in
-      let* (env', _expr) =
-        Env.get_obj_field field obj_id env'
-          ~succ:(interpret)
-          ~err:(Error.error_at pos)
-      in
-      (* self is removed by object evaluation, for this reason
-         we re-add self and $ to env' on each iteration here *)
-      let env' = Env.add_local "self" self_expr env' in
-      let env' = Env.add_local "$" toplevel_expr env' in
-      ok env'
-    )
-    fields
-    (ok env')
-  in
-
-  (* Remove self and $ from the resulting environment.
-     Posterior interpretations shouldn't have references to them. *)
-  let env' = Env.Map.remove "self" env' in
-  let env' = if had_toplevel then env' else Env.Map.remove "$" env' in
-
-  ok (env', RuntimeObject (pos, obj_id, fields))
+  (* We return env unchanged. RuntimeObject holds its own scoped env. *)
+  ok (env, RuntimeObject (pos, obj_env, fields))

Instead of evaluating all fields immediately and carefully removing self and $ references afterward, we just keep the object's environment inside RuntimeObject. Much cleaner!

The complexity moved to interpret_object_field_access, which now handles the actual evaluation when fields are accessed.

The gnarly field access logic

This function got... lengthy. The core challenge is handling three different types of scopes:

 and interpret_object_field_access env (pos, scope, chain_exprs) =
-  let* obj =
-    match Env.find_opt (string_of_object_scope scope) env with
-    | Some (ObjectPtr _ as obj) -> ok obj
-    | _ ->
-      Error.error_at pos
-        (match scope with
-        | Self -> Error.Msg.self_out_of_scope
-        | TopLevel -> Error.Msg.no_toplevel_object)
+  let* (env', obj) =
+    (* Special case: if this is just `self` or `$` with no field chain,
+     return the ObjectPtr directly. This ensures that when stored in variables,
+     they capture the concrete object ID, not a dynamic scope reference. *)
+    match scope with
+    | Self | TopLevel ->
+      (* For self and $, look them up as scopes in the environment *)
+      (match Env.find_opt (string_of_object_scope scope) env with
+      | Some (ObjectPtr _ as obj) -> ok (env, obj)
+      | Some (RuntimeObject _ as obj) -> ok (env, obj)
+      | _ ->
+        Error.error_at pos
+          (match scope with
+          | Self -> Error.Msg.self_out_of_scope
+          | TopLevel -> Error.Msg.no_toplevel_object
+          | ObjVarRef _ -> Error.Msg.var_not_found "" (* unreachable -- should never happen, TODO: make this unrepresentable *)
+          )
+      )
+    | ObjVarRef varname ->
+      (* For variable references, look up and evaluate the variable *)
+      let* (env', expr) =
+        Env.find_var varname env ~succ:(interpret) ~err:(Error.error_at pos)
+      in
+      match expr with
+      | ObjectPtr (obj_id, (Self | TopLevel)) ->
+        (* If the variable holds a Self/TopLevel reference, the obj_id
+           already captures which object it refers to. We don't need to
+           re-resolve Self/TopLevel in the current environment. *)
+        ok (env', ObjectPtr (obj_id, ObjVarRef varname))
+      | ObjectPtr _ as obj -> ok (env', obj)
+      | RuntimeObject _ as obj -> ok (env', obj)
+      | _ -> Error.error_at pos Error.Msg.must_be_object
   in
+

The tricky part is when a variable holds a Self or TopLevel reference -- we need to preserve the object ID that was captured when the variable was bound, not re-resolve it in the current environment.

Processing the chain involves lazily evaluating fields:

   List.fold_left
     (fun acc field_expr ->
       let* (env', prev_expr) = acc in
       let get_obj_id =
         match prev_expr with
-        | ObjectPtr (obj_id, _) -> ok obj_id
-        | RuntimeObject (_, obj_id, _) -> ok obj_id
+        | ObjectPtr (obj_id, _) ->
+          (* Temporarily add self and $ to env for lazy field evaluation *)
+          let field_env = Env.add_local "self" (ObjectPtr (obj_id, Self)) env' in
+          let field_env = Env.add_local_when_not_present "$" (ObjectPtr (obj_id, TopLevel)) field_env |> fst in
+          ok (obj_id, field_env)
+        | RuntimeObject (_, obj_env, _) ->
+          (match Env.find_opt "self" obj_env with
+          | Some (ObjectPtr (obj_id, _)) -> ok (obj_id, obj_env)
+          | _ -> error Error.Msg.must_be_object
+          )
         | _ -> Error.error_at pos Error.Msg.must_be_object
       in

       match field_expr with
       | String (pos, field) | Ident (pos, field) ->
-        let* obj_id = get_obj_id in
-        Env.get_obj_field field obj_id env'
+        let* (obj_id, field_env) = get_obj_id in
+        Env.get_obj_field field obj_id field_env
           ~succ:(interpret)
           ~err:(Error.error_at pos)
-      | IndexedExpr (pos, field, index_expr) ->
-        let* obj_id = get_obj_id in
-        let* (env', index_expr') = interpret env' index_expr in
-        let* (env', indexable_expr) =
-          Env.get_obj_field field obj_id env'
-            ~succ:(interpret)
-            ~err:(Error.error_at pos)
-        in
-          Result.fold
-            (Indexable.get index_expr' indexable_expr)
-            ~ok:(fun e -> interpret env' e)
-            ~error:(Error.error_at pos)
+      | Number _ as index_expr ->
+        (* Handle array/string indexing: prev_expr[number] *)
+        Result.fold
+          (Indexable.get index_expr prev_expr)
+          ~ok:(fun e -> ok (env', e))
+          ~error:(Error.error_at pos)
       | _e ->
         Error.error_at pos Error.Msg.interp_invalid_lookup
     )
-    (ok (env, obj))
+    (ok (env', obj))
     chain_exprs

 let eval expr = interpret Env.empty expr

This code needs to be simplified to avoid unreachable cases, such as the ObjVarRef. By leveraging the type system, we can encode this in the types, but this would require a big refactoring. The comments explaining the reason are trade-offs that I accept for the time being.

I also had to change how the bracketed expressions were being interpreted. This part got simpler and became more readable.

Breaking circular dependencies

The Json module now needs to actually interpret expressions, which creates a circular dependency. The quick fix is to pass the interpreter function as a parameter:

diff --git a/lib/json.ml b/lib/json.ml
index 57a3be6..c8e2159 100644
--- a/lib/json.ml
+++ b/lib/json.ml
@@ -2,7 +2,15 @@ open Ast
 open Result
 open Syntax_sugar

-let rec value_to_yojson (env : expr Env.Map.t) (expr : Ast.expr) : (Yojson.t, string) result =
+(* The interpreter type allows us to break the circular dependency
+   between Json and Interpreter modules.
+   Ideally, the Json module does not need to know anything about the
+   previous step.
+   TODO: Json module fuctions should not receive unevaluated values.
+   Guarantee Interpreter generates a new and evaluated AST. *)
+type interpreter = expr Env.Map.t -> expr -> ((expr Env.Map.t * expr), string) result
+
+let rec value_to_yojson ~(eval: interpreter) (env : expr Env.Map.t) (expr : Ast.expr) : (Yojson.t, string) result =
   match expr with
   | Number (_, n) ->
     ok (match n with
@@ -12,22 +20,24 @@ let rec value_to_yojson (env : expr Env.Map.t) (expr : Ast.expr) : (Yojson.t, st
   | Bool (_, b) -> ok (`Bool b)
   | String (_, s) -> ok (`String s)
   | Array (_, values) ->
-    let expr_to_list expr' = to_list (value_to_yojson env expr') in
+    let expr_to_list expr' = to_list (value_to_yojson ~eval env expr') in
     let results = values |> List.map expr_to_list |> List.concat in
     ok (`List results)
-  | RuntimeObject (pos, context, fieldset) -> obj_to_yojson env (pos, context, fieldset)
-  | expr -> error ("value type not representable as JSON: " ^ string_of_type expr)
+  | RuntimeObject (pos, obj_env, fieldset) -> obj_to_yojson ~eval env (pos, obj_env, fieldset)
+  | expr -> error (Error.Msg.value_not_represetable_as_json (string_of_type expr))

-and obj_to_yojson env (pos, obj_id, fieldset) =
+and obj_to_yojson ~(eval: interpreter) _env (pos, obj_env, fieldset) =
   let* fields =
     ObjectFields.fold
       (fun field acc ->
-        let* (_, expr) =
-          Env.get_obj_field field obj_id env
-            ~succ:(fun _ expr -> ok (env, expr))
-            ~err:(Error.error_at pos)
+        let* obj_id = match Env.find_opt "self" obj_env with
+        | Some (Ast.ObjectPtr (obj_id, _)) -> ok obj_id
+        | _ -> error Error.Msg.must_be_object
+        in
+        let* (env', expr) =
+          Env.get_obj_field field obj_id obj_env ~succ:eval ~err:(Error.error_at pos)
         in
-        let* yo_value = value_to_yojson env expr in
+        let* yo_value = value_to_yojson ~eval env' expr in
         let* fields = acc in
         ok ((field, yo_value) :: fields)
       )
@@ -35,6 +45,6 @@ and obj_to_yojson env (pos, obj_id, fieldset) =
       (ok [])
   in ok (`Assoc (List.rev fields))

-let expr_to_string (env, expr) =
-  let yojson = value_to_yojson env expr
+let expr_to_string ~(eval: interpreter) (env, expr) =
+  let yojson = value_to_yojson ~eval env expr
   in Result.map Yojson.pretty_to_string yojson

diff --git a/lib/tsonnet.ml b/lib/tsonnet.ml
index d142c9e..6913295 100644
--- a/lib/tsonnet.ml
+++ b/lib/tsonnet.ml
@@ -23,4 +23,4 @@ let run (config : Config.t) (filename: string) : (string, string) result =
     >>= Ast.debug config
     >>= Type.check config
     >>= Interpreter.eval
-    >>= Json.expr_to_string
+    >>= Json.expr_to_string ~eval:Interpreter.interpret

This isn't ideal -- the Json module shouldn't need to know about interpretation. Ideally, we'd have separate AST types for each compiler phase, with the JSON serializer only receiving fully-evaluated expressions. But that's a big refactoring, and for now this works.

Type checker follows suit

The type checker needs similar changes to track variable references:

diff --git a/lib/type.ml b/lib/type.ml
index 390313f..6334c27 100644
--- a/lib/type.ml
+++ b/lib/type.ml
@@ -264,14 +264,30 @@ and translate_object venv pos entries =
   ok (venv, TruntimeObject (obj_id, entry_types))

 and translate_object_field_access venv pos scope chain_exprs =
-  let* obj =
-    match Env.find_opt (string_of_object_scope scope) venv with
-    | Some (TobjectPtr _ as obj) -> ok obj
-    | _ ->
-      Error.error_at pos
-        (match scope with
-        | Self -> Error.Msg.self_out_of_scope
-        | TopLevel -> Error.Msg.no_toplevel_object)
+  let* (venv, obj) =
+    match scope with
+    | Self | TopLevel ->
+      (* For self and $, look them up directly *)
+      (match Env.find_opt (string_of_object_scope scope) venv with
+      | Some (TobjectPtr _ as obj) -> ok (venv, obj)
+      | _ ->
+        Error.error_at pos
+          (match scope with
+          | Self -> Error.Msg.self_out_of_scope
+          | TopLevel -> Error.Msg.no_toplevel_object
+          | ObjVarRef _ -> "" (* unreachable *)
+          )
+      )
+    | ObjVarRef varname ->
+      (* For variable references, look up and translate the variable *)
+      Env.find_var varname venv
+        ~succ:(fun venv ty ->
+          match ty with
+          | TobjectPtr _ | TruntimeObject _ as obj -> ok (venv, obj)
+          | Lazy expr -> translate venv expr
+          | _ -> Error.error_at pos Error.Msg.must_be_object
+        )
+        ~err:(Error.error_at pos)
   in

   List.fold_left
@@ -291,23 +307,12 @@ and translate_object_field_access venv pos scope chain_exprs =
         Env.get_obj_field field obj_id venv
           ~succ:translate_lazy
           ~err:(Error.error_at pos)
-      | IndexedExpr (pos, field, index_expr) ->
-        let* (venv', index_expr_ty) = translate venv index_expr in
-        let* () =
-          match index_expr_ty with
-          | Tnumber | Tstring -> ok ()
-          | ty -> Error.error_at pos (Error.Msg.type_non_indexable_type (to_string ty))
-        in
-        let* obj_id = get_obj_id in
-        let* (venv', ty) =
-          Env.get_obj_field field obj_id venv'
-            ~succ:translate_lazy
-            ~err:(Error.error_at pos)
-        in
-        (match ty with
-        | (Tarray _) as array_ty -> ok (venv', array_ty)
-        | Tstring as ty -> ok (venv', ty)
-        | _ -> Error.error_at pos (Error.Msg.type_non_indexable_field field)
+      | Number (pos, _) ->
+        (* Handle numeric indexing of strings and arrays *)
+        (match prev_ty with
+        | Tstring -> ok (venv, Tstring)
+        | Tarray elem_ty -> ok (venv, elem_ty)
+        | _ -> Error.error_at pos (Error.Msg.type_non_indexable_type (to_string prev_ty))
         )
       | _ ->
         Error.error_at pos (Error.Msg.type_invalid_lookup_key (string_of_type field_expr))

The field chain processing also simplifies since we don't need the IndexedExpr case anymore -- bracket notation is now just numeric indexing.

Taming more error messages

Found a couple more error messages that needed centralization:

diff --git a/lib/error.ml b/lib/error.ml
index cdbbada..b518608 100644
--- a/lib/error.ml
+++ b/lib/error.ml
@@ -10,6 +10,7 @@ module Msg = struct
   (* Shared operation messages *)
   let self_out_of_scope = "Can't use self outside of an object"
   let no_toplevel_object = "No top-level object found"
+  let var_not_found varname = varname ^ " not found"
   let invalid_binary_op = "Invalid binary operation"
   let invalid_unary_op = "Invalid unary operation"
   let must_be_object = "Must be an object"
@@ -31,6 +32,9 @@ module Msg = struct
   let interp_invalid_concat = "Invalid string concatenation operation"
   let interp_invalid_lookup = "Invalid object lookup"
   let interp_cannot_interpret expr = Printf.sprintf "Expression %s cannot be interpreted" expr
+
+  (* Others messages *)
+  let value_not_represetable_as_json value = "value type not representable as JSON: " ^ value
 end

 let enumerate_error_lines filename position ~highlight_error =
diff --git a/lib/error.mli b/lib/error.mli
index 365af96..58d7e8e 100644
--- a/lib/error.mli
+++ b/lib/error.mli
@@ -5,6 +5,7 @@ module Msg : sig
   (* Scope-related messages *)
   val self_out_of_scope : string
   val no_toplevel_object : string
+  val var_not_found : string -> string

   (* Shared operation messages *)
   val invalid_binary_op : string
@@ -28,6 +29,9 @@ module Msg : sig
   val interp_invalid_concat : string
   val interp_invalid_lookup : string
   val interp_cannot_interpret : string -> string
+
+  (* Other messages *)
+  val value_not_represetable_as_json : string -> string
 end

 val trace : string -> Ast.position -> (string, string) result

And voilà

$ dune exec -- tsonnet samples/tutorials/inner-reference.jsonnet
{
  "Martini": {
    "garnish": "Olive",
    "ingredients": [
      { "kind": "Farmer's Gin", "qty": 1 },
      { "kind": "Dry White Vermouth", "qty": 1 }
    ],
    "served": "Straight Up"
  }
}

Conclusion

Inner references required introducing proper lazy evaluation for objects, which turned out to be a bigger change than I expected. The interpreter now properly handles self-references through variables, and objects carry their own environments.

There are definitely some rough edges -- the unreachable cases in pattern matches, the circular dependency between Json and Interpreter, the lengthy field access function. These are all candidates for future refactoring, but they're manageable trade-offs for now. The tests pass, and that's what matters.

The entire diff can be seen here.

Thanks for reading Bit Maybe Wise! Subscribe for more compiler complexity that's definitely not a circular dependency (it totally is).

Photo by Didssph on Unsplash

DEV Community