DEV Community

Cover image for Parsing CSV in Haskell
Riccardo Odone
Riccardo Odone

Posted on • Edited on • Originally published at odone.io

Parsing CSV in Haskell

You can keep reading here or jump to my blog to get the full experience, including the wonderful pink, blue and white palette.


Parsing CSV without header:

{-# LANGUAGE ScopedTypeVariables #-}

module Main where

import qualified Data.Vector as V
import qualified Data.ByteString.Lazy as BL
import Data.Csv

-- file.csv
-- 1,2
-- 3,4

main :: IO ()
main = do
    f <- BL.readFile "file.csv"
    case decode NoHeader f of
        Left err -> print err
        Right xs -> V.forM_ xs $ \(x :: Int, y :: Int) -> print (x, y)
    -- 1,2
    -- 3,4
Enter fullscreen mode Exit fullscreen mode

Parsing CSV to a data type requires having an instance of FromRecord for that type:

{-# LANGUAGE ScopedTypeVariables #-}

module Main where

import qualified Data.Vector as V
import qualified Data.ByteString.Lazy as BL
import Data.Csv
import Control.Monad (mzero)

data Coords =
    Coords Int Int

instance FromRecord Coords where
    parseRecord xs
        | length xs == 2 = Coords <$> (xs .! 0) <*> (xs .! 1)
        | otherwise      = mzero

-- file.csv
-- 1,2
-- 3,4

main :: IO ()
main = do
    f <- BL.readFile "file.csv"
    case decode NoHeader f of
        Left err -> print err
        Right xs -> V.forM_ xs $ \(Coords x y) -> print (x, y)
    -- 1,2
    -- 3,4
Enter fullscreen mode Exit fullscreen mode

Or the same thing using generics:

{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveGeneric #-}

module Main where

import qualified Data.Vector as V
import qualified Data.ByteString.Lazy as BL
import Data.Csv
import GHC.Generics (Generic)

data Coords =
    Coords Int Int
    deriving (Generic)

instance FromRecord Coords
instance ToRecord Coords

-- file.csv
-- 1,2
-- 3,4

main :: IO ()
main = do
    f <- BL.readFile "file.csv"
    case decode NoHeader f of
        Left err -> print err
        Right xs -> V.forM_ xs $ \(Coords x y) -> print (x, y)
    -- 1,2
    -- 3,4
Enter fullscreen mode Exit fullscreen mode

Parsing CSV with explicit reference to the fields. This way the code is more robust to changes in the CSV (e.g. change order of columns):

{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}

module Main where

import qualified Data.Vector as V
import qualified Data.ByteString.Lazy as BL
import Data.Csv

data Coords =
    Coords Int Int

instance FromNamedRecord Coords where
    parseNamedRecord x = Coords <$> (x .: "x") <*> (x .: "y")

-- file.csv
-- x,y
-- 1,2
-- 3,4

main :: IO ()
main = do
    f <- BL.readFile "file.csv"
    case decodeByName f of
        Left err      -> print err
        Right (_, xs) -> V.forM_ xs $ \(Coords x y) -> print (x, y)
    -- 1,2
    -- 3,4
Enter fullscreen mode Exit fullscreen mode

Or the same thing using generics:

{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveGeneric #-}

module Main where

import qualified Data.Vector as V
import qualified Data.ByteString.Lazy as BL
import Data.Csv
import GHC.Generics (Generic)

data Coords =
    Coords { x :: Int, y :: Int }
    deriving (Generic)

instance FromNamedRecord Coords
instance ToNamedRecord Coords
instance DefaultOrdered Coords

-- file.csv
-- x,y
-- 1,2
-- 3,4

main :: IO ()
main = do
    f <- BL.readFile "file.csv"
    case decodeByName f of
        Left err      -> print err
        Right (_, xs) -> V.forM_ xs $ \(Coords x y) -> print (x, y)
    -- 1,2
    -- 3,4
Enter fullscreen mode Exit fullscreen mode

Get the latest content via email from me personally. Reply with your thoughts. Let's learn from each other. Subscribe to my PinkLetter!

Top comments (0)