BASIC to LLVM IR compiler made from scratch. The generated code can be emulated with LLVM (using lli
), compiled to Assembly (using llc
) or compiled to a native binary (using clang
)
The original BASIC grammar is supported, with minor changes. In Wirth notation, the original grammar is given by:
1. Program = BStatement { BStatement } int “END” .
2. BStatement = int ( Assign | Read | Data | Print | Goto | If | For | Next | Dim | Def | Gosub | Return | Remark ) .
3. Assign = “LET” Var “=” Exp .
4. Var = letter digit | letter [ “(” Exp { “,” Exp } “)” ] .
5. Exp = { “+” | “-” } Eb { ( “+” | “-” | “*” | “/” | “” ) Eb } .
6. Eb = “(” Exp “)” | Num | Var | ( “FN” letter | Predef ) “(” Exp “)” .
7. Predef = “SIN” | “COS” | “TAN” | “ATN” | “EXP” | “ABS” | “LOG” | “SQR” | “INT” | “RND” .
8. Read = “READ” Var { “,” Var } .
9. Data = “DATA” Snum { “,” Snum } .
10. Print = “PRINT” [ Pitem { “,” Pitem } [ “,” ] ].
11. Pitem = Exp | ““” Character { Character } “”” [ Exp ] .
12. Goto = ( “GOTO” | “GO” “TO” ) int .
13. If = “IF” Exp ( “>=” | “>” | “<>” | “<” | “<=” | “=” ) Exp “THEN” int .
14. For = “FOR” letter [ digit ] “=” Exp “TO” Exp [ “STEP” Exp ] .
15. Next = “NEXT” letter [ digit ] .
16. Dim = “DIM” letter “(” int { “,” int } “)” { “,” letter “(” int { “,” int } “)” } .
17. Def = “DEF FN” letter “(” letter [ digit ] “)” “=” Exp .
18. Gosub = “GOSUB” int .
19. Return = “RETURN” .
20. Remark = “REM” { Character } .
21. Int = digit { digit } .
22. Num = ( Int [ “.” { digit } ] | “.” Int ) [ “E” [ “+” | “-” ] Int ] .
23. Snum = [ “+” | “-” ] Num .
24. Character = letter | digit | special .
This compiler has the following changes from the original grammar:
- Programs don't have to end with an END statement. Reaching the end of the program automatically ends it. Furthermore, the END statement may appear anywhere in the middle of the program if an early exit is desired.
- According to BASIC at 50:
Without a DIM statement, the default dimensions are 0 to 10 for each dimension.
In this compiler, all variables are double precision floating-point scalars, unless explicitly declared as a vector with the DIM statement. Accessing invalid dimensions (e.g. accessing a variable as a vector without an explicit DIM statement) will raise a compilation error. Out-of-bounds accesses give undefined behavior during runtime.
- Variables can have any number of dimensions. In the original BASIC, only vectors and matrices (1 and 2 dimensions) were supported.
After installing this package, run python -m basic_compiler.main
. The -h/--help
flag lists supported flags:
$ python -m basic_compiler.main -h
usage: main.py [-h] [--opt] [--lli] [--bin BIN] source
BASIC to LLVM IR compiler.
positional arguments:
source source file
optional arguments:
-h, --help show this help message and exit
--opt call optimizer on generated code
--lli run generated code with lli
--bin BIN call assembler and linker to output a binary
The following program plots a normal distribution:
10 REM Taken from https://www.dartmouth.edu/basicfifty/commands.html
100 REM PLOT A NORMAL DISTRIBUTION CURVE
120 DEF FNN(X) = EXP(-(X↑2/2))/SQR(2*3.14159265)
140 FOR X = -2 TO 2 STEP .1
150 LET Y = FNN(X)
160 LET Y = INT(100*Y)
170 FOR Z = 1 TO Y
180 PRINT " ",
190 NEXT Z
200 PRINT "*"
210 NEXT X
220 END
python -m basic_compiler.main normal_distribution.bas
outputs to normal_distribution.ll
:
source_filename = "normal_distribution.bas"
@.str1 = private unnamed_addr constant [2 x i8] c" \00", align 1
@.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
@.str3 = private unnamed_addr constant [2 x i8] c"*\00", align 1
@.str4 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
@for_Z_end_12 = internal global double 0., align 8
@X = internal global double 0., align 8
@Y = internal global double 0., align 8
@Z = internal global double 0., align 8
define dso_local void @program(i8* %target_label) local_unnamed_addr #0 {
indirectbr i8* %target_label, [ label %label_10 ]
label_10:
; Taken from https://www.dartmouth.edu/basicfifty/commands.html
; PLOT A NORMAL DISTRIBUTION CURVE
store double -2.0, double* @X, align 8
br label %label_150
label_150:
%X_6 = load double, double* @X, align 8
%FNN_7 = tail call fast double @FNN(double %X_6) #0
store double %FNN_7, double* @Y, align 8
%Y_8 = load double, double* @Y, align 8
%fmul_9 = fmul fast double 100.0, %Y_8
%INT_10 = tail call fast double @llvm.rint.f64(double %fmul_9) #0
store double %INT_10, double* @Y, align 8
store double 1.0, double* @Z, align 8
%Y_11 = load double, double* @Y, align 8
store double %Y_11, double* @for_Z_end_12, align 8
br label %label_180
label_180:
tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i32 0, i32 0)) #0
%Z_13 = load double, double* @Z, align 8
%new_Z_14 = fadd fast double %Z_13, 1.0
store double %new_Z_14, double* @Z, align 8
%end_Z_15 = load double, double* @for_Z_end_12, align 8
%will_jump_16 = fcmp ole double %new_Z_14, %end_Z_15
br i1 %will_jump_16, label %label_180, label %for_exit_17
for_exit_17:
tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str4, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i32 0, i32 0)) #0
%X_18 = load double, double* @X, align 8
%new_X_19 = fadd fast double %X_18, 0.1
store double %new_X_19, double* @X, align 8
%will_jump_20 = fcmp ole double %new_X_19, 2.0
br i1 %will_jump_20, label %label_150, label %for_exit_21
for_exit_21:
tail call void @exit(i32 0) noreturn #0
unreachable
}
define dso_local i32 @main() local_unnamed_addr #1 {
tail call void @program(i8* blockaddress(@program, %label_10)) #0
ret i32 0
}
define dso_local double @FNN(double %arg) local_unnamed_addr #0 {
%pow_0 = tail call fast double @llvm.pow.f64(double %arg, double 2.0) #0
%fdiv_1 = fdiv fast double %pow_0, 2.0
%fdiv_1_neg = fsub fast double 0., %fdiv_1
%EXP_2 = tail call fast double @llvm.exp.f64(double %fdiv_1_neg) #0
%fmul_3 = fmul fast double 2.0, 3.14159265
%SQR_4 = tail call fast double @llvm.sqrt.f64(double %fmul_3) #0
%fdiv_5 = fdiv fast double %EXP_2, %SQR_4
ret double %fdiv_5
}
declare void @exit(i32) local_unnamed_addr noreturn #0
declare double @llvm.exp.f64(double) local_unnamed_addr #0
declare double @llvm.pow.f64(double, double) local_unnamed_addr #0
declare double @llvm.rint.f64(double) local_unnamed_addr #0
declare double @llvm.sqrt.f64(double) local_unnamed_addr #0
declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #0
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"BASIC to LLVM IR compiler (https://github.com/tiagoshibata/pcs3866-compilers)"}
This is unoptimized IR generated by the front-end. The --opt
flag calls clang
to further optimize the IR. The FNN
function can be seen with the correct operator priority. Using --opt
, it is re-written as:
define dso_local double @FNN(double %arg) local_unnamed_addr #2 {
%1 = fmul fast double %arg, %arg
%fdiv_1_neg = fmul fast double %1, -5.000000e-01
%EXP_2 = tail call fast double @llvm.exp.f64(double %fdiv_1_neg) #5
%fdiv_5 = fmul fast double %EXP_2, 0x3FD988453412DD64
ret double %fdiv_5
}
The pow
call was replaced by a fmul
instruction, followed by a multiplication by -0.5
. Then, exp
is called, and the division by SQR(2*3.14159265)
is replaced with a multiplication by a constant value.
https://sites.google.com/view/tiagoshibata-pcs3848/
Made for the PCS3848 - Compilers course.
BASIC at 50 - BASIC Commands and other online sources were used as references for details of each command.