Discussion on: Code Challenge: Follow the Dirty Money

View post

My solution using goroutines for speed :)

package main

import (
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
    "os"
    "regexp"
    "strconv"
    "strings"
)

var r, _ = regexp.Compile("\\$[0-9]+(\\,|\\.)[0-9]{0,2}")

type transaction struct {
    Content string   `json:"content"`
    Links   []string `json:"links"`
}

func crawl(sum chan<- float64, URLsChan chan<- []string, startingURL string) {
    response, err := http.Get(startingURL)
    if err != nil {
        log.Fatal(err)
        os.Exit(1)
    }

    defer response.Body.Close()
    resBuf, err := ioutil.ReadAll(response.Body)
    if err != nil {
        log.Fatal(err)
        os.Exit(1)
    }

    var trans transaction
    json.Unmarshal(resBuf, &trans)

    foundStrArr := r.FindAllString(trans.Content, -1)
    if len(foundStrArr) == 0 {
        sum <- 0
    } else {
        for _, elem := range foundStrArr {
            elem = strings.Replace(elem, ",", ".", -1)
            val, _ := strconv.ParseFloat(elem[1:], 64)
            sum <- val
        }
    }
    URLsChan <- trans.Links
}

func main() {
    sum := make(chan float64)
    urlsChan := make(chan []string)
    urlsList := []string{"https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"}
    var totalAmt float64

    for len(urlsList) > 0 {
        for _, url := range urlsList {
            go crawl(sum, urlsChan, url)
        }
        newUrlsList := []string{}
        for _ = range urlsList {
            totalAmt += <-sum
            newUrlsList = append(newUrlsList, <-urlsChan...)
        }
        urlsList = newUrlsList
    }
    fmt.Println("Total amount is", totalAmt)
}

P/s: Sorry for the ugly code :D It was written in a hurry

jorin • Nov 3 '17

Nice! I wrote a similar version but using a sync.WaitGroup and a separate constant number of workers to parallelize the download. You can find it here.

Stanley Nguyen • Nov 3 '17

One possible way is to further optimize by let different "layers" of json object urls running "concurrently". Nevertheless, I haven't come up with an actual implementation (as you can see, right now my implementation only crawl one by one "layer" of gist urls)