DEV Community

criss.dev
criss.dev

Posted on • Updated on • Originally published at cserb.net

Build your first blockchain from scratch (#3 Finding the longest chain)

In the previous articles (Probabilistic Finality, DAG) we looked into how Probabilistic Finality works and how DAGs can help to find the longest chain.

Today we are going have a look at implementing a basic DAG in Crystal. We will focus on implementing a DAG::Vertex class and finding the vertex with the longest distance from a given starting vertex.
Each vertex will have a unique name and a Hash to represent its edges pointing to its children.

Here is the complete finished implementation:

module DAG
  record Tip,
    vertex : DAG::Vertex,
    distance : Int32,
    branch_root : (DAG::Vertex | Nil)

  class Vertex
    alias Name = String

    getter name : Name
    getter edges : Hash(Name, Vertex)

    def initialize(@name, @edges = {} of Name => Vertex)
    end

    def add(edge_to vertex : Vertex) : Nil
      @edges[vertex.name] = vertex
    end

    def children : Array(Vertex)
      @edges.values
    end
  end

  extend self

    def tips(
    from vertex : Vertex,
    visited = Hash(Vertex::Name, Bool).new(false),
    distance = Hash(Vertex, Int32).new(0),
    tips = Array(DAG::Tip).new,
    start : (Vertex | Nil) = nil,
    current_branch_root : (Vertex | Nil) = nil
  ) : Array(DAG::Tip)
    # remember the starting vertex
    start = vertex if distance.empty?

    # mark current vertex as visited
    visited[vertex.name] = true

    # sort children just to make it easier to test
    sorted_children = vertex.children.sort_by { |c| c.name }
    # if it has no children it's the tip of a branch
    if !sorted_children[0]?
      tips << DAG::Tip.new(
        vertex: vertex,
        distance: distance[vertex],
        branch_root: current_branch_root
      )
    else
      # it has children so let's continue the traverse
      sorted_children.each do |child|
        if !visited[child.name]
          # we mark the child of the starting vertex as the current root
          # of the branch we are exploring
          if start == vertex
            current_branch_root = child
          end
          # we calculate the distance of the child
          distance[child] = distance[vertex] + 1

          tips(
            from: child,
            visited: visited,
            distance: distance,
            tips: tips,
            start: start,
            current_branch_root: current_branch_root
          )
        end
      end
    end

    tips
  end

  def tip_of_longest_branch(from vertex : DAG::Vertex) : DAG::Tip
    tips = self.tips(from: vertex)

    tip_of_longest_branch from: tips
  end

  def tip_of_longest_branch(from tips : Array(DAG::Tip)) : DAG::Tip
    # Tip of longest branch (chain)
    tips.max_by { |t| t.distance }
  end
end
Enter fullscreen mode Exit fullscreen mode

Let's break the code apart to understand how it works.

1. DAG::Vertex Class

module DAG
  ...

  class Vertex
    alias Name = String

    getter name : Name
    getter edges : Hash(Name, Vertex)

    def initialize(@name, @edges = {} of Name => Vertex)
    end

    def add(edge_to vertex : Vertex) : Void
      @edges[vertex.name] = vertex
    end

    def children : Array(Vertex)
      @edges.values
    end
  end

  ...

end
Enter fullscreen mode Exit fullscreen mode

This should be straight forward. We build a class Vertex and pass along the Name when we initialize it. Afterwards we create its edges:

v1 = DAG::Vertex.new("A")
v2 = DAG::Vertex.new("B")

v1.add edge_to: v2

pp v1.name #=> "A"
pp v1.children #=> [#<DAG::Vertex:0x7f0f4866ee60 @edges={}, @name="B">]
# same as
pp v1.edges.values #=> [#<DAG::Vertex:0x7f0f4866ee60 @edges={}, @name="B">]
Enter fullscreen mode Exit fullscreen mode

You can play with the code here

2. Distances and the Tips of branches

module DAG

  ...

  extend self

  def tips(
    from vertex : Vertex,
    visited = Hash(Vertex::Name, Bool).new(false),
    distance = Hash(Vertex, Int32).new(0),
    tips = Array(DAG::Tip).new,
    start : (Vertex | Nil) = nil,
    current_branch_root : (Vertex | Nil) = nil
  ) : Array(DAG::Tip)
    # remember the starting vertex
    start = vertex if distance.empty?

    # mark current vertex as visited
    visited[vertex.name] = true

    # sort children just to make it easier to test
    sorted_children = vertex.children.sort_by { |c| c.name }
    # if it has no children it's the tip of a branch
    if !sorted_children[0]?
      tips << DAG::Tip.new(
        vertex: vertex,
        distance: distance[vertex],
        branch_root: current_branch_root
      )
    else
      # it has children so let's continue the traverse
      sorted_children.each do |child|
        if !visited[child.name]
          # we mark the child of the starting vertex as the current root
          # of the branch we are exploring
          if start == vertex
            current_branch_root = child
          end
          # we calculate the distance of the child
          distance[child] = distance[vertex] + 1

          tips(
            from: child,
            visited: visited,
            distance: distance,
            tips: tips,
            start: start,
            current_branch_root: current_branch_root
          )
        end
      end
    end

    tips
  end

Enter fullscreen mode Exit fullscreen mode

To find out the distances starting from say block B1 to Bn we use depth first search (DFS), that explores each branch to the end and than moves on to the next one. If you think you need to better understand DFS you should watch this video.

  def tips(
    from vertex : Vertex,
    visited = Hash(Vertex::Name, Bool).new(false),
    distance = Hash(Vertex, Int32).new(0),
    tips = Array(DAG::Tip).new,
    start : (Vertex | Nil) = nil,
    current_branch_root : (Vertex | Nil) = nil
  ) : Array(DAG::Tip)

  ...
end
Enter fullscreen mode Exit fullscreen mode

The tips method is a recursive method that initially only needs the from vertex parameter and all other parameters are passed along with each step.

visited is an empty hash, which has been initialized with a default value of false for each key that is unknown.

distance contains a Hash of DAG::Vertex with its value being the distance from the first block passed as from vertex parameter

tips contains an Array of DAG::Tip which is a Struct defined using the record macro

module DAG
  record Tip,
    vertex : DAG::Vertex,
    distance : Int32,
    branch_root : (DAG::Vertex | Nil)

  ...
end
Enter fullscreen mode Exit fullscreen mode

We will examine the missing start and current_branch_root parameters later. First let's refresh our memory with an example of a DAG:

Alt Text

If we pass block 3000.1 as from vertex we should receive an Array of [ 3001.2, 3004.2, 3005.1 ] as the tips of this DAG.

Let's step into the method now:

    ...

    # remember the starting vertex
    start = vertex if distance.empty?

    # mark current vertex as visited
    visited[vertex.name] = true

    # sort children just to make it easier to test
    sorted_children = vertex.children.sort_by { |c| c.name }

    ...
Enter fullscreen mode Exit fullscreen mode

start is set to the first vertex that we actually passed to the method and it's not altered afterwards as distance will never be empty once we move on to the next step of the traverse.

visited is marking the vertex as visited and this is repeated each step

sorted_children is only set for convenience

    # if it has no children it's the tip of a branch
    if sorted_children.empty?
      tips << DAG::Tip.new(
        vertex: vertex,
        distance: distance[vertex],
        branch_root: current_branch_root
      )
    else
      # it has children so let's continue the traverse
      sorted_children.each do |child|
        if !visited[child.name]
          # we mark the child of the starting vertex as the current root
          # of the branch we are exploring
          if start == vertex
            current_branch_root = child
          end
          # we calculate the distance of the child
          distance[child] = distance[vertex] + 1

          tips(
            from: child,
            visited: visited,
            distance: distance,
            tips: tips,
            start: start,
            current_branch_root: current_branch_root
          )
        end
      end
    end
Enter fullscreen mode Exit fullscreen mode

sorted_children.empty? if this returns true it means we are at the end of a branch. So it's a DAG::Tip

if !visited[child.name] otherwise we continue traversing unvisited children.

if start == vertex
  current_branch_root = child
end
Enter fullscreen mode Exit fullscreen mode

Remember the DAG example above? You have to imagine that once the traverse reaches a tip it steps back until the the branch splits off again and starts traversing unvisited vertices. If we start from 3000.1 all possible current_branch_roots are 3001.1 and 3001.2. We will need this later on to determine which is the branch root of the longest chain.

distance[child] = distance[vertex] + 1 now we calculate the distance and in the next step call tips again to make another step.

  def tip_of_longest_branch(from vertex : DAG::Vertex) : DAG::Tip
    tips = self.tips(from: vertex)

    tip_of_longest_branch from: tips
  end

  def tip_of_longest_branch(from tips : Array(DAG::Tip)) : DAG::Tip
    # Tip of longest branch (chain)
    tips.max_by { |t| t.distance }
  end
Enter fullscreen mode Exit fullscreen mode

tips.max_by { |t| t.distance } to find the tip of the longest branch and its branch root we filter for the one with the highest distance value.
If there are multiple vertices with the same distance we don't really care which one we pick. Why this is not important will become apparent later in the series.

Now let's give it a try:

v1 = DAG::Vertex.new("A")
v2 = DAG::Vertex.new("B")
v3 = DAG::Vertex.new("C")
v4 = DAG::Vertex.new("D")


v1.add edge_to: v2
v2.add edge_to: v3
v3.add edge_to: v4

pp DAG.tips from: v1
#[DAG::Tip(
#  @branch_root=
#   #<DAG::Vertex:0x7fce8d002e60
#    @edges=
#     {"C" =>
#       #<DAG::Vertex:0x7fce8d002e40
#        @edges={"D" => #<DAG::Vertex:0x7fce8d002e20 @edges={}, #@name="D">},
#        @name="C">},
#    @name="B">,
#  @distance=3,
#  @vertex=#<DAG::Vertex:0x7fce8d002e20 @edges={}, @name="D">)]


pp DAG.tip_of_longest_branch from: v1
# @vertex=#<DAG::Vertex:0x7f3acc219e20 @edges={}, @name="D">)

Enter fullscreen mode Exit fullscreen mode

You can try it out yourself here and create more complex structures resembling real life scenarios.

3. Conclusion
We found an easy way to create a DAG and using a simple depth first search we were able to find the tip of the longest chain.

Integrating the code into a working blockchain will require some more work, but we'll get to that later on.

4. Cocol Project
The Cocol Project is an effort to implement a basic, but fully working, blockchain in Crystal — a "Minimum Viable Blockchain"

The code we talked about in this article is part of the ProbFin shard. Here is the DAG implementation and the tests

Discussion (0)