Haskell: From a parallel perspective

Brian McKenna

October 13, 2010

What's Haskell?

What's it used for?

Frag xmonad OmegaGB House Leksah Blobs Hircules

How's it look?

main = putStrLn "Hello world"

How's it look?

fac 0 = 1
fac n = n * fac (n-1)

main = print (fac 42)

Why parallel Haskell?

Explicit side effects

I really want to stress this point:

f :: Int -> Int
g :: Int -> IO Int

Sequence is just function application. The following are the same:

main = do
name <- getLine
putStrLn ("Hi " ++ name)

main = getLine >>=
(\name -> putStrLn ("Hi " ++ name))

Popular parallelism

Explicit

Implicit

Semi-implicit

Explicit concurrency (Java)

import java.lang.Runnable;

class ThreadTest implements Runnable {
public void run() {
System.out.println("Hello world");
}

public static void main(String[] args) {
Thread runner = new Thread(new ThreadTest());
runner.start();
try {
runner.join();
} catch(InterruptedException e) {
}
}
}

Explicit concurrency (Clojure)

(let [runner (Thread. #(println "Hello world"))]
(.start runner)
(.join runner))

Semi-implicit parallelism (C# - TPL)

using System;
using System.Threading.Tasks;

class Test
{
static void Main()
{
Parallel.For(0, 100, i =>
{
Console.WriteLine(i);
});
}
}

Implicit parallelism (NESL - nested data )

function qsort(a) =
if (#a < 2) then a
else
let pivot = a[#a/2];
lesser = {e in a| e < pivot};
equal = {e in a| e == pivot};
greater = {e in a| e > pivot};
result = {qsort(v): v in [lesser,greater]};
in result[0] ++ equal ++ result[1] $

Haskell is diverse

Semi-implicit (Parallel Haskell)

Implicit (Data Parallel Haskell)

Explicit concurrency (Concurrent Haskell)

Semi-implicit in Haskell

Quicksort (sequential)



qsort [] = []
qsort [x] = [x]
qsort (x:xs) = lt ++ (x:gt)
where
lt = qsort [y | y <- xs, y < x]
gt = qsort [y | y <- xs, y >= x]

main = do
print $ head sorted -- Will be 1
print $ last sorted -- Will be 10000
where nums = [1..5000] ++ reverse [5000..10000]
sorted = qsort nums

Quicksort (with par and pseq)

import Control.Parallel

qsort [] = []
qsort [x] = [x]
qsort (x:xs) = lt `par` gt `pseq` lt ++ (x:gt)
where
lt = qsort [y | y <- xs, y < x]
gt = qsort [y | y <- xs, y >= x]

main = do
print $ head sorted -- Will be 1
print $ last sorted -- Will be 10000
where nums = [1..5000] ++ reverse [5000..10000]
sorted = qsort nums

par and pseq

par a b

pseq a b

a `par` b `pseq` a + b

Algorithm + Strategy = Parallelism

Skeletons

Evaluation degrees

par, pseq and stategies

Why?

Careful!

Implicit in Haskell

Remember the NESL example?

function qsort(a) =
if (#a < 2) then a
else
let pivot = a[#a/2];
lesser = {e in a| e < pivot};
equal = {e in a| e == pivot};
greater = {e in a| e > pivot};
result = {qsort(v): v in [lesser,greater]};
in result[0] ++ equal ++ result[1] $

Quicksort (Data Parallel Haskell)

{-# LANGUAGE PArr #-}
{-# OPTIONS -fvectorise #-}
{-# OPTIONS -fno-spec-constr-count #-}
module QSort (qsort) where

import Data.Array.Parallel.Prelude
import Data.Array.Parallel.Prelude.Int

import qualified Prelude

qsort :: PArray Int -> PArray Int
{-# NOINLINE qsort #-}
qsort xs = toPArrayP (qsort' (fromPArrayP xs))

Quicksort (Data Parallel Haskell)

qsort' :: [:Int:] -> [:Int:]
qsort' a
| lengthP a < 2 = a
| otherwise =
let pivot = a !: (lengthP a `div` 2)
lesser = [:e | e <- a, e < pivot:]
equal = [:e | e <- a, e == pivot:]
greater = [:e | e <- a, e > pivot:]
result = mapP qsort' [:lesser, greater:]
in (result !: 0) +:+ equal +:+ (result !: 1)

Data Parallel Haskell

Intel Concurrent Collections (CnC)

myStep items tag =
do word1 <- get items "left"
word2 <- get items "right"
put items "result"
(word1 ++ word2 ++ show tag)

cncGraph =
do tags <- newTagCol
items <- newItemCol
prescribe tags (myStep items)
initialize$
do put items "left" "Hello "
put items "right" "World "
putt tags 99
finalize$
do get items "result"

Intel Concurrent Collections (CnC)

Explicit in Haskell

Concurrent Haskell

Creating a thread (forkIO)

import Control.Concurrent

main = do
putStrLn "Hello"
forkIO (putStrLn "world")
threadDelay 1000000 -- Let the thread finish

Mutable values (MVar)

import Control.Concurrent
import Control.Concurrent.MVar

child mvar = do
putStrLn "world"
threadDelay 1000000
putMVar mvar ()

main = do
putStrLn "Hello"
mvar <- newEmptyMVar
forkIO (child mvar)
takeMVar mvar
return ()

Channels (Chan)

import Control.Concurrent
import Control.Concurrent.Chan

main = do
chan <- newChan
forkIO (child chan)
threadDelay 1000000
printNumbers chan
return ()

Channels (Chan)

child chan = do
mapM_ (writeChan chan) [2,4..100]
return ()

printNumbers chan = do
empty <- isEmptyChan chan
if empty
then return ()
else do
n <- readChan chan
print n
threadDelay 1000000
printNumbers chan

Semaphores (QSemN)

import Control.Concurrent
import Control.Concurrent.QSemN

child sem n = do
threadDelay (1000000 * n)
putStrLn ("world " ++ show n)
signalQSemN sem 1

main = do
sem <- newQSemN 0
putStrLn "Hello"
mapM_ (\n -> forkIO (child sem n)) [1..4]
waitQSemN sem 4

Software Transactional Memory (STM)

STM variables (TVar)

import Control.Concurrent
import Control.Concurrent.STM

main = do
var <- atomically $ newTVar 0
mapM_ (\_ -> forkIO $ incValue var 10000) [1..4]
printWhileLess var 40000

STM variables (TVar)

incValue var times = do
atomically $ do
n <- readTVar var
writeTVar var (n + 1)
if times == 0
then return ()
else incValue var (times - 1)

printWhileLess var times = do
n <- atomically $ readTVar var
if n >= times
then return ()
else do
print n
printWhileLess var times

More STM

Other functions

Other data types

ThreadScope

Summary

Thanks

Brian McKenna

http://brianmckenna.org/

http://twitter.com/puffnfresh

Special thanks to

References

References