-
Notifications
You must be signed in to change notification settings - Fork 0
/
Inline-Assembly-OCaml-Workshop-2015.tex
119 lines (116 loc) · 3.43 KB
/
Inline-Assembly-OCaml-Workshop-2015.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
\documentclass{beamer}
\usepackage{array}
\usepackage{listings}
\usepackage[scaled=0.8]{DejaVuSansMono}
\lstset{,
basicstyle=\footnotesize\ttfamily,
showstringspaces=false,
tabsize=2,
xleftmargin=\parindent,
xrightmargin=\parindent,
columns=flexible,
frame=tb
}
\title[Inline Assembly]
{Inline Assembly for OCaml}
\author[Brankov, Vladimir]{V.~Brankov\inst{1}}
\institute[Jane Street]
{
\inst{1}
Jane Street
}
\date[August 2015]
{OCaml Workshop, 2015}
\begin{document}
\begin{frame}
\frametitle{OCaml Inline Assembly}
\begin{itemize}
\item Motivation: Intel AVX Vector Instructions
\begin{itemize}
\item 256-bit Float Vector = 4x performance
\end{itemize}
\bigskip
\item Embed assembly code in OCaml code
\begin{itemize}
\item Work with OCaml variables
\item Compiler can perform optimizations
\item Equivalent to built-in compiler primitives (no C call)
\end{itemize}
\bigskip
\item Speed
\begin{itemize}
\item Vector instructions (float, int and string)
\item Special instructions (Float.max)
\item Language limits (arrays of C structs for HDF5)
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{OCaml Inline Assembly}
\begin{lstlisting}
external float_min : float -> float -> float
= "float_min_stub" "fmin" "float"
external float_min : float -> float -> float
= "%asm" "float_min_stub" "minsd %0, %2" "mx" "2" "=x" ""
movsd .L133(%rip), %xmm1
movsd .L104(%rip), %xmm0
call float_min_stub@PLT # external C call
movsd %xmm0, 24(%rsp)
movsd .L133(%rip), %xmm0
movsd .L104(%rip), %xmm1
minsd %xmm1, %xmm0 # inline assembly
movsd %xmm0, 16(%rsp)
\end{lstlisting}
\end{frame}
\begin{frame}
\frametitle{OCaml Inline Assembly}
\begin{itemize}
\item Implementation: \lstinline[language=Bash]$opam switch 4.03.0+pr162$
\medskip
\item Borrows GCC syntax:\\
\ \ \ \lstinline[language={[Objective]Caml}]$
external floor\_asm : float -> float$ \\
\ \ \ \ \ \ \lstinline[language={[Objective]Caml}]$
= "\%asm" \ \ \ \ "floor\_stub" \ \ \ \ \ \ "roundsd \$1, \%0, \%1"
\ \ \ \ \ \ \ \ \ "mx" \ \ \ \ "=x"$
\medskip
\item OCaml compiler optimizations:
\begin{itemize}
\item Variables in registers or memory
\item Commutative variables
\item Boxing and reference elimination
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{OCaml Inline Assembly}
\begin{itemize}
\item Simple benchmarks:\\
\medskip
\begin{small}
\begin{tabular}{l>{\hspace{-6pt}}l}
float min \dotfill & 6x \\
floor \dotfill & 5x \\
+. *. \dotfill & 2.3x \\
module Complex \dotfill & 1.3 - 12x \\
HDF5 arrays of C structs \dotfill & 14x \\
\end{tabular}
\end{small}
\medskip
\item Larger benchmarks:\\
\medskip
\begin{small}
\begin{tabular}{l>{\hspace{-6pt}}l}
Binomial Pricer \dotfill & 3.28x \\
Correlation matrix \dotfill & 2.7x \\
\end{tabular}
\end{small}
\medskip
\item Alternatives:
\begin{itemize}
\item "noalloc" externals
\item Blocks of C code
\end{itemize}
\end{itemize}
\end{frame}
\end{document}