DEV Community

Cover image for Let’s Understand Chrome V8 — Chapter 9: Builtin
灰豆
灰豆

Posted on • Originally published at Medium

Let’s Understand Chrome V8 — Chapter 9: Builtin

Original source:https://medium.com/@huidou/lets-understand-chrome-v8-chapter-9-builtin-55a62836d594
Welcome to other chapters of Let’s Understand Chrome V8


Most of the functions in V8 are implemented in the builtin. In this paper, we’ll talk about the initialization of builtin and common builtin types.

1. Initialization

Below is the code class, which is responsible for managing all builtins.

1.  class Code : public HeapObject {
2.   public:
3.    NEVER_READ_ONLY_SPACE
4.    // Opaque data type for encapsulating code flags like kind, inline
5.    // cache state, and arguments count.
6.    using Flags = uint32_t;
7.  #define CODE_KIND_LIST(V)   \
8.      V(OPTIMIZED_FUNCTION)     \
9.      V(BYTECODE_HANDLER)       \
10.     V(STUB)                   \
11.     V(BUILTIN)                \
12.     V(REGEXP)                 \
13.     V(WASM_FUNCTION)          \
14.     V(WASM_TO_CAPI_FUNCTION)  \
15.     V(WASM_TO_JS_FUNCTION)    \
16.     V(JS_TO_WASM_FUNCTION)    \
17.     V(JS_TO_JS_FUNCTION)      \
18.     V(WASM_INTERPRETER_ENTRY) \
19.     V(C_WASM_ENTRY)
20.     enum Kind {
21.   #define DEFINE_CODE_KIND_ENUM(name) name,
22.       CODE_KIND_LIST(DEFINE_CODE_KIND_ENUM)
23.   #undef DEFINE_CODE_KIND_ENUM
24.           NUMBER_OF_KINDS
25.     };
26.     static const char* Kind2String(Kind kind);
27.     // Layout description.
28.   #define CODE_FIELDS(V)                                                    \
29.     V(kRelocationInfoOffset, kTaggedSize)                                   \
30.     V(kDeoptimizationDataOffset, kTaggedSize)                               \
31.     V(kSourcePositionTableOffset, kTaggedSize)                              \
32.     V(kCodeDataContainerOffset, kTaggedSize)                                \
33.     /* Data or code not directly visited by GC directly starts here. */     \
34.     /* The serializer needs to copy bytes starting from here verbatim. */   \
35.     /* Objects embedded into code is visited via reloc info. */             \
36.     V(kDataStart, 0)                                                        \
37.     V(kInstructionSizeOffset, kIntSize)                                     \
38.     V(kFlagsOffset, kIntSize)                                               \
39.     V(kSafepointTableOffsetOffset, kIntSize)                                \
40.     V(kHandlerTableOffsetOffset, kIntSize)                                  \
41.     V(kConstantPoolOffsetOffset,                                            \
42.       FLAG_enable_embedded_constant_pool ? kIntSize : 0)                    \
43.     V(kCodeCommentsOffsetOffset, kIntSize)                                  \
44.     V(kBuiltinIndexOffset, kIntSize)                                        \
45.     V(kUnalignedHeaderSize, 0)                                              \
46.     /* Add padding to align the instruction start following right after */  \
47.     /* the Code object header. */                                           \
48.     V(kOptionalPaddingOffset, CODE_POINTER_PADDING(kOptionalPaddingOffset)) \
49.     V(kHeaderSize, 0)
50.     DEFINE_FIELD_OFFSET_CONSTANTS(HeapObject::kHeaderSize, CODE_FIELDS)
51.  //omit...........................
52.   };
Enter fullscreen mode Exit fullscreen mode

Line 7 defines the type of code, the definitions of builtins are given in builtins-definitions.h. The roles of code and builtin are different, but their initializations are done uniformly by void Isolate::Initialize(Isolate* isolate,const v8::Isolate::CreateParams& params). Below is initialization.

1.  void Isolate::Initialize(Isolate* isolate,
2.                           const v8::Isolate::CreateParams& params) {
3.    i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
4.    CHECK_NOT_NULL(params.array_buffer_allocator);
5.    i_isolate->set_array_buffer_allocator(params.array_buffer_allocator);
6.    if (params.snapshot_blob != nullptr) {
7.      i_isolate->set_snapshot_blob(params.snapshot_blob);
8.    } else {
9.      i_isolate->set_snapshot_blob(i::Snapshot::DefaultSnapshotBlob());
10.   }
11.    auto code_event_handler = params.code_event_handler;
12.  //.....................omit....................
13.    if (!i::Snapshot::Initialize(i_isolate)) {
14.      // If snapshot data was provided and we failed to deserialize it must
15.      // have been corrupted.
16.      if (i_isolate->snapshot_blob() != nullptr) {
17.        FATAL(
18.            "Failed to deserialize the V8 snapshot blob. This can mean that the "
19.            "snapshot blob file is corrupted or missing.");
20.      }
21.      base::ElapsedTimer timer;
22.      if (i::FLAG_profile_deserialization) timer.Start();
23.      i_isolate->InitWithoutSnapshot();
24.      if (i::FLAG_profile_deserialization) {
25.        double ms = timer.Elapsed().InMillisecondsF();
26.        i::PrintF("[Initializing isolate from scratch took %0.3f ms]\n", ms);
27.      }
28.    }
29.    i_isolate->set_only_terminate_in_safe_scope(
30.        params.only_terminate_in_safe_scope);
31.  }
Enter fullscreen mode Exit fullscreen mode

During the initialization, the 22nd line of code will be executed, and call the below function.

1.  void SetupIsolateDelegate::SetupBuiltinsInternal(Isolate* isolate) {
2.  //..................omit
3.  //..................omit
4.    int index = 0;
5.    Code code;
6.  #define BUILD_CPP(Name)                                                      \
7.    code = BuildAdaptor(isolate, index, FUNCTION_ADDR(Builtin_##Name), #Name); \
8.    AddBuiltin(builtins, index++, code);
9.  #define BUILD_TFJ(Name, Argc, ...)                              \
10.   code = BuildWithCodeStubAssemblerJS(                          \
11.        isolate, index, &Builtins::Generate_##Name, Argc, #Name); \
12.    AddBuiltin(builtins, index++, code);
13.  #define BUILD_TFC(Name, InterfaceDescriptor)                      \
14.    /* Return size is from the provided CallInterfaceDescriptor. */ \
15.    code = BuildWithCodeStubAssemblerCS(                            \
16.        isolate, index, &Builtins::Generate_##Name,                 \
17.        CallDescriptors::InterfaceDescriptor, #Name);               \
18.    AddBuiltin(builtins, index++, code);
19.  #define BUILD_TFS(Name, ...)                                                   \
20.    /* Return size for generic TF builtins (stub linkage) is always 1. */        \
21.    code =                                                                       \
22.        BuildWithCodeStubAssemblerCS(isolate, index, &Builtins::Generate_##Name, \
23.                                     CallDescriptors::Name, #Name);              \
24.    AddBuiltin(builtins, index++, code);
25.  #define BUILD_TFH(Name, InterfaceDescriptor)              \
26.    /* Return size for IC builtins/handlers is always 1. */ \
27.    code = BuildWithCodeStubAssemblerCS(                    \
28.        isolate, index, &Builtins::Generate_##Name,         \
29.        CallDescriptors::InterfaceDescriptor, #Name);       \
30.    AddBuiltin(builtins, index++, code);
31.  #define BUILD_BCH(Name, OperandScale, Bytecode)                           \
32.    code = GenerateBytecodeHandler(isolate, index, OperandScale, Bytecode); \
33.    AddBuiltin(builtins, index++, code);
34.  #define BUILD_ASM(Name, InterfaceDescriptor)                                \
35.    code = BuildWithMacroAssembler(isolate, index, Builtins::Generate_##Name, \
36.                                   #Name);                                    \
37.    AddBuiltin(builtins, index++, code);
38.    BUILTIN_LIST(BUILD_CPP, BUILD_TFJ, BUILD_TFC, BUILD_TFS, BUILD_TFH, BUILD_BCH,
39.                 BUILD_ASM);
40//omit...........................
41.  }
Enter fullscreen mode Exit fullscreen mode

The main task of initialization is to generate and compile Builtin code and mount it on the i::isolate. We explain BuildWithCodeStubAssemblerCS in detail.

The first parameter is used to hold the builtin; the second parameter is the index of the builtin array; the third parameter is the function pointer, pointing to the generated function of a builtin; the fourth parameter is the call descriptor; the last parameter is a name.

1.  // Builder for builtins implemented in TurboFan with CallStub linkage.
2.  Code BuildWithCodeStubAssemblerCS(Isolate* isolate, int32_t builtin_index,
3.                                    CodeAssemblerGenerator generator,
4.                                    CallDescriptors::Key interface_descriptor,
5.                                    const char* name) {
6.    HandleScope scope(isolate);
7.    // Canonicalize handles, so that we can share constant pool entries pointing
8.    // to code targets without dereferencing their handles.
9.    CanonicalHandleScope canonical(isolate);
10.   Zone zone(isolate->allocator(), ZONE_NAME);
11.    // The interface descriptor with given key must be initialized at this point
12.    // and this construction just queries the details from the descriptors table.
13.    CallInterfaceDescriptor descriptor(interface_descriptor);
14.    // Ensure descriptor is already initialized.
15.    DCHECK_LE(0, descriptor.GetRegisterParameterCount());
16.    compiler::CodeAssemblerState state(
17.        isolate, &zone, descriptor, Code::BUILTIN, name,
18.        PoisoningMitigationLevel::kDontPoison, builtin_index);
19.    generator(&state);
20.    Handle<Code> code = compiler::CodeAssembler::GenerateCode(
21.        &state, BuiltinAssemblerOptions(isolate, builtin_index));
22.    return *code;
23.  }
Enter fullscreen mode Exit fullscreen mode

Line 19 of the above code calls the generator function. Let’s take the TF_BUILTIN(RecordWrite, RecordWriteCodeStubAssembler) as an example to explain:

1.  TF_BUILTIN(RecordWrite, RecordWriteCodeStubAssembler) {
2.    Label generational_wb(this);
3.    Label incremental_wb(this);
4.    Label exit(this);
5.    Node* remembered_set = Parameter(Descriptor::kRememberedSet);
6.    Branch(ShouldEmitRememberSet(remembered_set), &generational_wb,
7.           &incremental_wb);
8.    BIND(&generational_wb);
9.    {
10.     Label test_old_to_young_flags(this);
11.      Label store_buffer_exit(this), store_buffer_incremental_wb(this);
12.      TNode<IntPtrT> slot = UncheckedCast<IntPtrT>(Parameter(Descriptor::kSlot));
13.      Branch(IsMarking(), &test_old_to_young_flags, &store_buffer_exit);
14.      BIND(&test_old_to_young_flags);
15.      {
16.        TNode<IntPtrT> value =
17.            BitcastTaggedToWord(Load(MachineType::TaggedPointer(), slot));
18.        TNode<BoolT> value_is_young =
19.            IsPageFlagSet(value, MemoryChunk::kIsInYoungGenerationMask);
20.        GotoIfNot(value_is_young, &incremental_wb);
21.        TNode<IntPtrT> object =
22.            BitcastTaggedToWord(Parameter(Descriptor::kObject));
23.        TNode<BoolT> object_is_young =
24.            IsPageFlagSet(object, MemoryChunk::kIsInYoungGenerationMask);
25.        Branch(object_is_young, &incremental_wb, &store_buffer_incremental_wb);
26.      }
27.      BIND(&store_buffer_exit);
28.      {
29.        TNode<ExternalReference> isolate_constant =
30.            ExternalConstant(ExternalReference::isolate_address(isolate()));
31.        Node* fp_mode = Parameter(Descriptor::kFPMode);
32.        InsertToStoreBufferAndGoto(isolate_constant, slot, fp_mode, &exit);
33.      }
34.      BIND(&store_buffer_incremental_wb);
35.      {
36.        TNode<ExternalReference> isolate_constant =
37.            ExternalConstant(ExternalReference::isolate_address(isolate()));
38.        Node* fp_mode = Parameter(Descriptor::kFPMode);
39.        InsertToStoreBufferAndGoto(isolate_constant, slot, fp_mode,
40.                                   &incremental_wb);
41.      }
42.    } //........................omit......................................
43.    BIND(&exit);
44.    IncrementCounter(isolate()->counters()->write_barriers(), 1);
45.    Return(TrueConstant());
46.  }
Enter fullscreen mode Exit fullscreen mode

TF_BUILTIN(RecordWrite, RecordWriteCodeStubAssembler) generates the source of RecordWrite. TF_BUILTIN is a macro template. After expansion, you can see that its member CodeAssemblerState* state that saves the generated code. The generated RecordWrite source code will be saved in the code structure.

class Code : public HeapObject {
 public:
  NEVER_READ_ONLY_SPACE
  // Opaque data type for encapsulating code flags like kind, inline
  // cache state, and arguments count.
  using Flags = uint32_t;

#define CODE_KIND_LIST(V)   \
  V(OPTIMIZED_FUNCTION)     \
  V(BYTECODE_HANDLER)       \
  V(STUB)                   \
  V(BUILTIN)                \
  V(REGEXP)                 \
  V(WASM_FUNCTION)          \
  V(WASM_TO_CAPI_FUNCTION)  \
  V(WASM_TO_JS_FUNCTION)    \
  V(JS_TO_WASM_FUNCTION)    \
  V(JS_TO_JS_FUNCTION)      \
  V(WASM_INTERPRETER_ENTRY) \
  V(C_WASM_ENTRY)

  enum Kind {
#define DEFINE_CODE_KIND_ENUM(name) name,
    CODE_KIND_LIST(DEFINE_CODE_KIND_ENUM)
#undef DEFINE_CODE_KIND_ENUM
        NUMBER_OF_KINDS
  };
//..................omit
Enter fullscreen mode Exit fullscreen mode

We can see that builtins are classified in the code class. By the way, code is a heap object, which is managed by the V8 heap. we will explain the V8 heap object in future, Figure 1 shows the call stack.

Image description
In SetupBuiltinsInternal(), we see that AddBuiltin() adds code to i::isolate, as below.

void SetupIsolateDelegate::AddBuiltin(Builtins* builtins, int index,
                                      Code code) {
  DCHECK_EQ(index, code.builtin_index());
  builtins->set_builtin(index, code);
}
//..............separation......................
void Builtins::set_builtin(int index, Code builtin) {
  isolate_->heap()->set_builtin(index, builtin);
}
Enter fullscreen mode Exit fullscreen mode

The generated builtins will be inserted into the builtins_[Builtins::builtin_count] array, and finally the array will be mounted to i::isolate.

2. Builtin

From a functional point of view, builtin covers many kernel functions such as ignition, bytecode, and ECMA specification functions. There are detailed comments in BUILTIN_LIST. There are seven types of builtin.

#define BUILD_CPP(Name)
#define BUILD_TFJ(Name, Argc, ...) 
#define BUILD_TFC(Name, InterfaceDescriptor)  
#define BUILD_TFS(Name, ...) 
#define BUILD_TFH(Name, InterfaceDescriptor) 
#define BUILD_BCH(Name, OperandScale, Bytecode)  
#define BUILD_ASM(Name, InterfaceDescriptor)
Enter fullscreen mode Exit fullscreen mode

Let’s talk about BuildAdaptor.

1.  Code BuildAdaptor(Isolate* isolate, int32_t builtin_index,
2.                    Address builtin_address, const char* name) {
3.    HandleScope scope(isolate);
4.    // Canonicalize handles, so that we can share constant pool entries pointing
5.    // to code targets without dereferencing their handles.
6.    CanonicalHandleScope canonical(isolate);
7.    constexpr int kBufferSize = 32 * KB;
8.    byte buffer[kBufferSize];
9.    MacroAssembler masm(isolate, BuiltinAssemblerOptions(isolate, builtin_index),
10.                       CodeObjectRequired::kYes,
11.                        ExternalAssemblerBuffer(buffer, kBufferSize));
12.    masm.set_builtin_index(builtin_index);
13.    DCHECK(!masm.has_frame());
14.    Builtins::Generate_Adaptor(&masm, builtin_address);
15.    CodeDesc desc;
16.    masm.GetCode(isolate, &desc);
17.    Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::BUILTIN)
18.                            .set_self_reference(masm.CodeObject())
19.                            .set_builtin_index(builtin_index)
20.                            .Build();
21.    return *code;
22.  }
Enter fullscreen mode Exit fullscreen mode

The 14th line of code is responsible for generating the BuildAdaptor, and finally calls the following code:

void Builtins::Generate_Adaptor(MacroAssembler* masm, Address address) {
  __ LoadAddress(kJavaScriptCallExtraArg1Register,
                 ExternalReference::Create(address));
  __ Jump(BUILTIN_CODE(masm->isolate(), AdaptorWithBuiltinExitFrame),
          RelocInfo::CODE_TARGET);
}
}
Enter fullscreen mode Exit fullscreen mode

Figure 2 shows the generated source code in char.
Image description
Summary: The generation of each builtin is different, but the method of analyzing and debuging the code is the same.


Okay, that wraps it up for this share. I’ll see you guys next time, take care!

Please reach out to me if you have any issues. WeChat: qq9123013 Email: v8blink@outlook.com

Top comments (0)