Discussion on: Verify matching parens, brackets and braces with Elixir

View post

Replies for: Sure, there are many ways to solve the same coding problem, but each language has its own nitty-gritty details where you/we/people can distinguish ...

Very nice piece of code! I'm still fairly new to Elixir and Functional Programming, but I appreciate your response a lot. Using the pattern match to extract the parentesis is very cool, I didn't know we could pattern match with <<x::binary-size(1)>>, but the syntax is very neat. There is so much to learn.
I didn't try to refactor this code using Tail Call Optmization, but it is very elegant. Thanks for sharing so much (:

The only problem I can see is not verifying if the parens we get are matching, we are only verifying if we get an even number of parens. I believe that passing something like (a + [ b - c ) ] will return true.

Alim Nastaev • Jul 15 '21 • Edited

Yeah, I see.
Spent a little bit time on it again. The code is a little bit ugly since I left a lot of comments and in some places unnecessary guards for the sake of readability.

BTW, you can run tests this way - elixir match_parens.exs --test

defmodule Parens do
  @moduledoc """

  the idea here is:
  assuming we parsed what we need to this - "[{()}]",
  where openers = "[{(" and closers = ")}]"
  if we reverse openers, first elemnt of it
  will be a matching pair of the first element in closers.

  We do all that logic on the way to make our implementation fast.

  Pay attention to the acc or secong argument (a list) of `run_check/2` function where
  we collect openers in reversed order and result

  There is a way to make it faster. Any thoughts?
  """
  @openers ["{", "[", "("]
  @closers ["}", "]", ")"]
  @valid_matches ["{}", "[]", "()"]

  def is_matching?(input) when is_binary(input), do: run_check(input, ["", nil])
  def is_matching?(input), do: "#{inspect(input)} is not a valid string."
  def is_matching?(), do: "input should be a valid string"

  ### --- BASE CASES --- ###
  # even matching pairs
  defp run_check("" = _input, ["" = _openers, result] = _acc), do: result
  # odd matching pairs (don't need guard here, but left it for the sake of readability)
  defp run_check("", [openers, _]) when openers != "", do: false
  #########################

  # using acc (openers) to collect openers in reverse order,
  # so we can pattern match on it later
  defp run_check(<<head::binary-size(1)>> <> rest, [openers, result])
       when head in @openers,
       do: run_check(rest, [head <> openers, result])

  # covering case where there is only closers
  defp run_check(<<head::binary-size(1)>> <> rest, ["" = openers, _result])
       when head in @closers,
       do: run_check(rest, [openers, false])

  # openers collected and now we compare first element in it with closers
  defp run_check(<<closer::binary-size(1)>> <> rest_main, [
         <<opener::binary-size(1)>> <> rest_openers,
         _result
       ])
       when closer in @closers and (opener <> closer) in @valid_matches,
       do: run_check(rest_main, [rest_openers, true])

  # even number of openers and closers, but they are not matching
  defp run_check(<<closer::binary-size(1)>> <> rest_main, [
         <<_opener::binary-size(1)>> <> rest_openers,
         _result
       ])
       when closer in @closers,
       do: run_check(rest_main, [rest_openers, false])

  # if not openers or/and closers
  defp run_check(<<_::binary-size(1)>> <> rest, acc), do: run_check(rest, acc)
end

case System.argv() do
  ["--test"] ->
    ExUnit.start()

    defmodule ParensTest do
      use ExUnit.Case

      test "tests according to the test data from assessment" do
        assert Parens.is_matching?("1 * 2 (3 + [4 / 5])") == true
        assert Parens.is_matching?("1 * 2 (3 + [4 / 5]") == false

        assert Parens.is_matching?("(a + [ b - c ] )") == true
        assert Parens.is_matching?("(a + [ b - c ) ]") == false
        assert Parens.is_matching?("(a + [ b - c ] ) ]") == false

        assert Parens.is_matching?("[4") == false

        assert Parens.is_matching?("5[(") == false
        assert Parens.is_matching?("5]") == false
        assert Parens.is_matching?("])}") == false
        assert Parens.is_matching?("[{(") == false
        assert Parens.is_matching?("[}[{(") == false

        assert Parens.is_matching?([3]) == "[3] is not a valid string."
        assert Parens.is_matching?(3) == "3 is not a valid string."
        assert Parens.is_matching?() == "input should be a valid string"
      end
    end

  _ ->
    IO.puts(:stderr, "\nplease specify --test flag")
end

Lucas Perez • Jul 15 '21 • Edited

Nice!
I tried to basically remake this post's implementation but using a string as a stack (and not a list), so I'm not using lists anywhere and just evaluating if the stack is empty at the end of everything.

Also, there is an early escape when parens don't match, so we don't have to read the whole input:

when head in @closers,
  do: (stack_head <> head in @valid_matches) && run_check(tail, stack_tail)
# or using a map
when head in @closers
  do: @pairs[stack_head] == head && run_check(tail, stack_tail)

This is what I came up with:

defmodule Parens do
  @openers ["{", "[", "("]
  @closers ["}", "]", ")"]
  @pairs %{"{" => "}", "[" => "]", "(" => ")"}

  def is_matching?(input) when is_binary(input), do: run_check(input, "")
  def is_matching?(input), do: "#{inspect(input)} is not a valid string."
  def is_matching?(), do: "input should be a valid string"

  defp run_check("", stack), do: stack == ""

  # When head is an opener, simply put in the stack
  defp run_check(<<head::binary-size(1)>> <> tail, stack)
    when head in @openers,
    do: run_check(tail, head <> stack)

  # Edge case to ensure we can correctly return false when the stack is empty
  # and the next character is a closer. Without this, the function would not
  # match the run_check when in @closers because the stack can't be empty
  defp run_check(<<head::binary-size(1)>> <> _tail, "") when head in @closers do
    false
  end

  # Empty stacks does not match here, so I had to create a specific match for an
  # empty stack and a closer
  defp run_check(
    <<head::binary-size(1)>> <> tail,
    <<stack_head::binary-size(1)>> <> stack_tail
  ) when head in @closers,
    do: @pairs[stack_head] == head && run_check(tail, stack_tail)

  # When head is not opener nor closer, simply ignore
  defp run_check(<<_head::binary-size(1)>> <> tail, stack) do
    run_check(tail, stack)
  end
end

I was testing the time execution by running this is IEx:
:timer.tc(fn -> Parens.is_matching?("(((((((((([[{[]}]]))))()([[[]]]))))))){({({({{{{({()})}{({})}}}})})})}[]") end)
I got to the conclusion that using a map to check if the parens are matching is faster that a list with the valid matches, but not by much.

# Apparently this
@pairs %{"{" => "}", "[" => "]", "(" => ")"}
@pairs[stack_head] == head

# is slightly faster than this
@valid_matches ["{}", "[]", "()"]
stack_head <> head in @valid_matches

Alim Nastaev • Jul 15 '21 • Edited

There is a way to avoid @valid_matches or @pairs (in your case) and make it even faster. Again, pattern matching is the Boss, right?!
So we can avoid a map and a list by this (in my case):

  defp run_check("]" <> rest_main, [
         "[" <> rest_openers,
         _result
       ]),
       do: run_check(rest_main, [rest_openers, true])

  defp run_check(")" <> rest_main, [
         "(" <> rest_openers,
         _result
       ]),
       do: run_check(rest_main, [rest_openers, true])

  defp run_check("}" <> rest_main, [
         "{" <> rest_openers,
         _result
       ]),
       do: run_check(rest_main, [rest_openers, true])

But again there is a way to make it even faster :)

Great job with cleaning up your solution!

Lucas Perez • Jul 16 '21

At first I didn't want to make so many repeated function definitions to match specifically "}", "]" or ")", I thought it wasn't that readable and too repetitive, but I changed my mind over a few months, and now looking at the perspective of speed, it looks pretty good.

Maybe a way to make it faster is to break the recursion everytime the accumulator was set to false and return the false itself? Or maybe the very first definition of run_check/2 could be

defp run_check(_input, [_openers, false]), do: false

I'm not sure how to make it faster, does it involve some change in strategy? 🤔

Many thanks! 😁

Alim Nastaev • Jul 16 '21 • Edited

If we have 2 inputs:

{({})} and 2. { ( { } ) }

In 1 we have to perform 6 iterations to walk through it and in 2 it's 11, right?

So is there a way we can walk through both inputs with the same amount of iterations? If yes - we can apply the same approach to our solution.

(Pattern matching is whispering here something for us again 🙂 )

Lucas Perez • Jul 16 '21

Well, if we could group the whitespaces together during the match, that would be great. The program would just have to check non-whitespace characters.
Something like this would work for only a single space

defp run_check("{ " <> rest, [stack, result]), do: run_check(rest, ["{" <> stack, result])

And then repeat for brackets and parenthesis.

It would be perfect to match an unspecified amount of whitecharacters at once. Using a regex it would be a breeze, but I'm sure that is not what we want here.

In fact, we would like to match everything that is not one of { [ ( ) ] }, with unspecified length, not just whitespaces. Then we could ignore everything that is irrelevant, essentially doing a filter inside the match.
After achieving a way to do that, parsing abc {} would require the same amount of iterations as {}.

But how do I do it?
Matches like "{" <> irrelevant <> "{" fail because the size of the variable irrelevant is unknown, and "{" <> <<irr::binary>> <> "{" fails because a binary field without a size is only allowed as the last member of the match (basically the same problem, Elixir doesn't know up to what point to check).

I could write like 200 headers of the function for one, two, three, ..., 200 character length for the irrelevant variable, but that would be tedious. I am 100% sure there is a way to make Elixir itself write some Elixir code for me, maybe use some range to dynamically create these functions, and leave a last header matching whatever didn't match before. It would speed up the parsing for most cases, and in the worst scenario it would be the same as it is now.

Alim Nastaev • Jul 16 '21

I cleaned up/refactored my solution. Check it out down below.

There is a section ### making a recursive step wider ### - that code explains my thoughts. I left some logs in that part, so if you run code from iex you will clearly see what I was trying to say and the output looks like this:

iex::8> ParensBracketsBraces.matching?("(a + [ b - c ] )")

ELEMENT <> WHITESPACE 

: [elem: "a", input_tail: "+ [ b - c ] )", stack: "("]

ELEMENT <> WHITESPACE 

: [elem: "+", input_tail: "[ b - c ] )", stack: "("]

WHITESPACE <> ELEMENT 

: [elem: "b", input_tail: " - c ] )", stack: "[("]

WHITESPACE <> ELEMENT 

: [elem: "-", input_tail: " c ] )", stack: "[("]

WHITESPACE <> ELEMENT 

: [elem: "c", input_tail: " ] )", stack: "[("]

BTW, hope you use iex - very powerful tool...

defmodule ParensBracketsBraces do
  @openers ["{", "[", "("]
  @closers ["}", "]", ")"]
  @all @openers ++ @closers

  def matching?(input) when is_binary(input), do: run_check("", input)
  def matching?(input), do: "#{inspect(input)} is not a valid string."
  def matching?(), do: "input should be a valid string"

  ### --- BASE CASES --- ###
  # matching pairs
  defp run_check("" = _stack, "" = _input), do: true
  # no matches
  defp run_check(_, "" = _input), do: false

  ### matches on opener only and putting it to the stack ###
  defp run_check(stack, <<opener::binary-size(1)>> <> input_tail)
       when opener in @openers,
       do: run_check(opener <> stack, input_tail)

  ### covering case where there are only closers ###
  defp run_check("", <<closer::binary-size(1)>> <> _tail_input)
       when closer in @closers,
       do: false

  ### comparing top item in the stack with closer ###

  defp run_check("[" <> stack_tail, "]" <> input_tail),
    do: run_check(stack_tail, input_tail)

  defp run_check("(" <> stack_tail, ")" <> input_tail),
    do: run_check(stack_tail, input_tail)

  defp run_check("{" <> stack_tail, "}" <> input_tail),
    do: run_check(stack_tail, input_tail)

  ###############################

  ### making a recursive step wider ###

  defp run_check(stack, <<elem::binary-size(1)>> <> " " <> input_tail)
       when elem not in @all do
    IO.inspect(binding(), label: "\nELEMENT <> WHITESPACE \n\n")
    run_check(stack, input_tail)
  end

  defp run_check(stack, " " <> <<elem::binary-size(1)>> <> input_tail)
       when elem not in @all do
    IO.inspect(binding(), label: "\nWHITESPACE <> ELEMENT \n\n")
    run_check(stack, input_tail)
  end

  ###############################

  ### if not openers or/and closers ###
  defp run_check(stack, <<_::binary-size(1)>> <> input_tail), do: run_check(stack, input_tail)
end