Compare commits

...

46 Commits

Author SHA1 Message Date
r4
99489dfeb6 fix error message 2021-12-30 18:10:08 +01:00
r4
e7da8dfe38 make put all frees after set_err
I had the case that frees would invalidate memory set_err would later
use, so putting frees after set_err is probably good practice.
2021-12-30 18:07:59 +01:00
r4
f6b74f8f97 add generic arrays 2021-12-30 17:59:28 +01:00
r4
6f91a71306 fix memory leak + add function to convert to string 2021-12-29 22:15:56 +01:00
r4
4d5cd93354 implement basic RAII-ish array management 2021-12-29 21:42:43 +01:00
r4
7773cc6c14 change IR stream to linked list 2021-12-29 13:27:58 +01:00
r4
45feb3fe1d improve Windows detection 2021-12-28 18:03:52 +01:00
r4
ba8d2f0702 add nilptrs, more conversion functions and change how ptrs work
Pointers now no longer point to the Value struct (the internal wrapper for
values) but to the value itself.
2021-12-28 17:53:27 +01:00
r4
befce544e7 move type enum out of struct 2021-12-28 13:55:01 +01:00
r4
0d5313a063 fix invalid memory access 2021-12-28 13:39:59 +01:00
r4
22f71d7e56 add pointers, variable arguments and void functions 2021-12-28 13:39:12 +01:00
r4
a706ea6a3f disallow use of declared variable in declaration expression 2021-12-28 12:11:04 +01:00
r4
0b2741f73f allow newline-padding of one-line if and else bodies 2021-12-26 19:23:42 +01:00
r4
cda56d5b9c allow for one-line if and else bodies without {} 2021-12-26 19:18:52 +01:00
r4
46e7487cad remove expression parser code redundancy 2021-12-26 15:06:33 +01:00
r4
18d6e7b7df add 'not equal' operator 2021-12-26 12:19:54 +01:00
r4
1f47b5e16c fix float precision loss bug 2021-12-26 11:59:01 +01:00
r4
dfe1ac90e8 add getln() function and calculator example
Runtime-allocated strings currently always leak memory, I will
definitely have to fix that.
2021-12-26 11:36:52 +01:00
r4
d8b470f0eb rename print() to putln() 2021-12-26 10:55:31 +01:00
r4
cf93109f1e IR optimization
IR optimization currently very basic, in fact it probably doesn't even
improve performance measurably.
2021-12-25 23:01:18 +01:00
r4
850dafbbc9 move preprocessor args into CPPFLAGS 2021-12-25 22:32:16 +01:00
r4
803368a264 remove unnecessary include 2021-12-25 14:10:53 +01:00
r4
b4c369e1d9 add sleep() function and improve Windows support
I could only test Windows support by compiling with gcc MinGW-w64 and
running with Wine.
2021-12-25 14:06:20 +01:00
r4
dd67a1bf5d reorganize examples 2021-12-25 12:45:27 +01:00
r4
b58810e822 better error messages 2021-12-25 12:32:52 +01:00
r4
f02dae603d unify printing + add basic strings 2021-12-25 12:16:06 +01:00
r4
92c4c5c991 Merge branch 'master' of https://git.nobrain.org/r4/lang 2021-12-23 22:26:55 +01:00
r4
9bd4d4e0c4 support for builtin functions with 0 args 2021-12-23 22:25:41 +01:00
r4
bb75b78a36 Update 'README.md' 2021-12-23 22:05:43 +01:00
r4
ca232fbf6a add mandelbrot example 2021-12-23 21:44:59 +01:00
r4
6bdc4e3210 add logical or and logical and 2021-12-23 21:42:09 +01:00
r4
d67008cfbf add char literals 2021-12-23 21:26:53 +01:00
r4
a0842424ec print not in IR 2021-12-23 21:08:01 +01:00
r4
84785dc3cf add boolean logic 2021-12-23 21:06:49 +01:00
r4
d185396a1c rename arith to binary 2021-12-23 20:10:02 +01:00
r4
97e8e32ebc add if and else, and fix nested loops (hopefully) 2021-12-23 19:58:00 +01:00
r4
ef63742015 reduce mallocs when calling functions 2021-12-23 17:00:57 +01:00
r4
7ae9ddaee9 multi-line expressions 2021-12-23 16:51:10 +01:00
r4
298883939b rewrite expression parser with parentheses 2021-12-23 15:56:12 +01:00
r4
052e78bf2e add pi example 2021-12-22 17:40:54 +01:00
r4
d7860fdac0 add VM 2021-12-22 17:23:24 +01:00
r4
5dd15ce9f1 add builtin functions 2021-12-22 16:09:52 +01:00
r4
41a5dba208 replace malloc with xmalloc 2021-12-22 13:10:00 +01:00
r4
b80e5a9c4e formatting 2021-12-22 12:57:14 +01:00
r4
e7f4773cba make while loop parsing more elegant
It is now possible to parse parts of the program and save the IR for
later addition. This is currently used for while loops but will also make other
control flow elements easier to implement in the future.
2021-12-22 12:52:16 +01:00
r4
a8be4540b1 fix segfault
So basically, I used i++ on a linked list instead of i->next. As if that
wasn't bad enough on its own, it even managed to somehow still work in the 2
test scenarios I gave it. Regardless, it is now fixed.
2021-12-21 18:38:52 +01:00
21 changed files with 1987 additions and 353 deletions

View File

@@ -1,3 +1,6 @@
ifneq ($(OS),Windows_NT)
CPPFLAGS = -D_POSIX_C_SOURCE=200112L
endif
CFLAGS = -ggdb -std=c11 -Wall -Wextra -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition
#CFLAGS = -pg -std=c11 -Wall -Wextra -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition
#CFLAGS = -O3 -std=c11 -Wall -Wextra -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition
@@ -12,7 +15,7 @@ $(EXE): $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
%.o: %.c
$(CC) -c -o $@ $< $(CFLAGS)
$(CC) -c -o $@ $< $(CPPFLAGS) $(CFLAGS)
deps.mk: $(SOURCE) $(HEADERS)
@echo "# Automatically generated by $(CC) -MM." > $@

View File

@@ -1,3 +1 @@
# lang
Yet another useless programming language (a lot of stuff not yet implemented).

View File

@@ -0,0 +1,21 @@
put("Enter an operation (+, -, *, /): ")
op := getln()
if (!(op == "+" || op == "-" || op == "*" || op == "/")) {
put("Unknown operation: ")
putln(op)
} else {
put("1st number: ")
n1 := float(getln())
put("2nd number: ")
n2 := float(getln())
put("Result: ")
if (op == "+")
putln(n1 + n2)
else if (op == "-")
putln(n1 - n2)
else if (op == "*")
putln(n1 * n2)
else if (op == "/")
putln(n1 / n2)
}

View File

@@ -2,10 +2,11 @@ x := 1
y := 1
i := 60
while i + 1 {
while i {
z := x + y
y = x
x = z
//print(z)
putln(z)
i = i - 1
}

View File

@@ -0,0 +1,86 @@
width := 280
height := 100
iterations := 100
xmin := -2.0
xmax := 0.5
ymin := -1.0
ymax := 1.0
// Some further coordinates:
/*iterations := 1000
xmin := -0.9072945999
xmax := -0.8984310833
ymin := 0.2304178999
ymax := 0.2370858666*/
/*iterations := 100
xmin := -0.193596288
xmax := -0.119260320
ymin := 1.006960992
ymax := 1.062687264*/
/*iterations := 800
xmin := -0.1675326254
xmax := -0.1675148625
ymin := 1.0413005672
ymax := 1.0413138086*/
/*iterations := 918
xmin := -0.7506201104
xmax := -0.7503409687
ymin := 0.0170447020
ymax := 0.0172540583*/
/*iterations := 400
xmin := -0.7548484315
xmax := -0.7540548595
ymin := 0.0530077004
ymax := 0.0536039518*/
y := 0
while y < height {
c_im := (float(height - y) / float(height)) * (ymax - ymin) + ymin
x := 0
while x < width {
c_re := (float(x) / float(width)) * (xmax - xmin) + xmin
z_re := 0.0
z_im := 0.0
it := 0
loop := true
while it < iterations && loop {
/* z = z*z + c */
/* (a + bi)^2 = a^2 + 2abi - b^2 */
z_re_tmp := z_re * z_re - z_im * z_im + c_re
z_im = 2.0 * z_re * z_im + c_im
z_re = z_re_tmp
/* Break if the number shoots off to infinity. */
if z_re * z_re + z_im * z_im > 4.0 {
loop = false
}
it = it + 1
}
if it <= iterations / 5
put(' ')
else if it <= iterations / 5 * 2
put('.')
else if it <= iterations / 5 * 3
put(',')
else if it <= iterations / 5 * 4
put('*')
else if it < iterations
put('+')
else if it == iterations
put('#')
x = x + 1
}
put('\n')
y = y + 1
}

17
examples/pi.script Normal file
View File

@@ -0,0 +1,17 @@
sum := 0.0
k := 0
iterations := 100
while k < iterations {
k_f := float(k)
sum = sum + 1.0 / pow(16.0, k_f) *
(4.0 / (8.0 * k_f + 1.0) -
2.0 / (8.0 * k_f + 4.0) -
1.0 / (8.0 * k_f + 5.0) -
1.0 / (8.0 * k_f + 6.0))
k = k + 1
}
put("π ≈ ")
putln(sum)

251
ir.c
View File

@@ -10,100 +10,225 @@ const char *irinstr_str[IRInstrEnumSize] = {
[IRSub] = "sub",
[IRMul] = "mul",
[IRDiv] = "div",
[IRPrint] = "print",
[IREq] = "eq",
[IRNeq] = "neq",
[IRLt] = "lt",
[IRLe] = "le",
[IRNot] = "not",
[IRAnd] = "and",
[IROr] = "or",
[IRJmp] = "jmp",
[IRJnz] = "jnz",
[IRCallInternal] = "calli",
[IRAddrOf] = "addrof",
[IRArrMake] = "mkarr",
};
#define IRTOKS_INIT_CAP 4096
#define IRLIST_INIT_CAP_LONG 4096
#define IRLIST_INIT_CAP_SHORT 16
void irtoks_init(IRToks *v) {
v->toks = malloc(sizeof(IRTok) * IRTOKS_INIT_CAP);
static void irlist_init_with_cap(IRList *v, size_t cap);
static IRItem *irlist_new_item(IRList *v);
static void irlist_init_with_cap(IRList *v, size_t cap) {
v->begin = NULL;
v->end = NULL;
v->p = pool_new(sizeof(IRItem) * cap);
v->index = NULL;
v->len = 0;
v->cap = IRTOKS_INIT_CAP;
}
void irtoks_term(IRToks *v) {
for (size_t i = 0; i < v->len; i++) {
if (v->toks[i].instr == IRPrint) {
for (IRArgs *a = v->toks[i].Print.args; a != NULL;) {
IRArgs *next = a->next;
free(a);
a = next;
}
}
}
free(v->toks);
static IRItem *irlist_new_item(IRList *v) {
IRItem *ret = pool_alloc(v->p, sizeof(IRItem));
ret->next = NULL;
return ret;
}
void irtoks_app(IRToks *v, IRTok t) {
if (v->len+1 > v->cap)
v->toks = realloc(v->toks, sizeof(IRTok) * (v->cap *= 2));
v->toks[v->len++] = t;
void irlist_init_long(IRList *v) {
irlist_init_with_cap(v, IRLIST_INIT_CAP_LONG);
}
static void print_val(const Value *v);
static void print_irparam(const IRParam *p);
static void print_val(const Value *v) {
switch (v->type.kind) {
case TypeFloat:
printf("%f", v->Float);
break;
case TypeInt:
printf("%zd", v->Int);
break;
default:
printf("(unknown type)");
break;
}
void irlist_init_short(IRList *v) {
irlist_init_with_cap(v, IRLIST_INIT_CAP_SHORT);
}
static void print_irparam(const IRParam *p) {
if (p->kind == IRParamLiteral) {
print_val(&p->Literal);
} else if (p->kind == IRParamAddr) {
printf("%%%zd", p->Addr);
}
static void free_irparam(IRParam *v, bool purge);
/* if purge is set, even statically allocated literals are freed */
static void free_irparam(IRParam *v, bool purge) {
if (v->kind == IRParamLiteral)
free_value(&v->Literal, purge);
}
void print_ir(IRToks *v) {
for (size_t i = 0; i < v->len; i++) {
printf("%04zx ", i);
printf("%s", irinstr_str[v->toks[i].instr]);
switch (v->toks[i].instr) {
void irlist_term(IRList *v) {
for (IRItem *i = v->begin; i; i = i->next) {
switch (i->tok.instr) {
case IRSet:
case IRNeg:
printf(" %%%zu ", v->toks[i].Unary.addr);
print_irparam(&v->toks[i].Unary.val);
case IRNot:
case IRAddrOf:
free_irparam(&i->tok.Unary.val, true);
break;
case IRAdd:
case IRSub:
case IRDiv:
case IRMul:
printf(" %%%zu ", v->toks[i].Arith.addr);
print_irparam(&v->toks[i].Arith.lhs);
printf(" ");
print_irparam(&v->toks[i].Arith.rhs);
break;
case IRPrint:
for (IRArgs *a = v->toks[i].Print.args; a != NULL; a = a->next) {
printf(" ");
print_irparam(&a->param);
}
case IREq:
case IRNeq:
case IRLt:
case IRLe:
case IRAnd:
case IROr:
free_irparam(&i->tok.Binary.lhs, true);
free_irparam(&i->tok.Binary.rhs, true);
break;
case IRJmp:
printf(" %zx", v->toks[i].Jmp.iaddr);
break;
case IRJnz:
free_irparam(&i->tok.CJmp.condition, true);
break;
case IRCallInternal:
for (size_t j = 0; j < i->tok.CallI.n_args; j++)
free_irparam(&i->tok.CallI.args[j], true);
free(i->tok.CallI.args);
break;
case IRArrMake:
for (size_t j = 0; j < i->tok.ArrMake.len; j++)
free_irparam(&i->tok.ArrMake.vals[j], true);
free(i->tok.ArrMake.vals);
break;
default:
ASSERT_UNREACHED();
}
}
pool_term(v->p);
}
void irlist_app(IRList *v, IRTok t) {
v->index = NULL; /* invalidate index */
IRItem *itm = irlist_new_item(v);
itm->tok = t;
if (!v->begin && !v->end)
v->begin = v->end = itm;
else {
v->end->next = itm;
v->end = itm;
}
v->len++;
}
void irlist_eat_irlist(IRList *v, IRList *other) {
v->index = NULL; /* invalidate index */
size_t jmp_offset = v->len-1;
for (IRItem *i = other->begin; i; i = i->next) {
/* correct for changed jump addresses */
if (i->tok.instr == IRJmp)
i->tok.Jmp.iaddr += jmp_offset;
else if (i->tok.instr == IRJnz)
i->tok.CJmp.iaddr += jmp_offset;
irlist_app(v, i->tok);
}
/* We're not calling irlist_term() because we don't want associated items
* (for example function arguments) to get deallocated as well. */
pool_term(other->p);
}
void irlist_update_index(IRList *v) {
if (v->index)
return;
v->index = pool_alloc(v->p, sizeof(size_t) * v->len);
size_t num_idx = 0;
for (IRItem *i = v->begin; i; i = i->next, num_idx++)
v->index[num_idx] = i;
}
static void print_irparam(const IRParam *p);
static void print_irparam(const IRParam *p) {
if (p->kind == IRParamLiteral) {
print_value(&p->Literal, false);
} else if (p->kind == IRParamAddr) {
printf("%%%zd", p->Addr);
}
}
void print_ir(IRList *v, const BuiltinFunc *builtin_funcs) {
size_t iaddr = 0;
for (IRItem *i = v->begin; i; i = i->next, iaddr++) {
printf("%04zx ", iaddr);
printf("%s", irinstr_str[i->tok.instr]);
switch (i->tok.instr) {
case IRSet:
case IRNeg:
case IRNot:
case IRAddrOf:
printf(" %%%zx ", i->tok.Unary.addr);
print_irparam(&i->tok.Unary.val);
break;
case IRAdd:
case IRSub:
case IRDiv:
case IRMul:
case IREq:
case IRNeq:
case IRLt:
case IRLe:
case IRAnd:
case IROr:
printf(" %%%zx ", i->tok.Binary.addr);
print_irparam(&i->tok.Binary.lhs);
printf(" ");
print_irparam(&i->tok.Binary.rhs);
break;
case IRJmp:
printf(" %zx", i->tok.Jmp.iaddr);
break;
case IRJnz:
printf(" ");
print_irparam(&v->toks[i].CJmp.condition);
printf(" %zx", v->toks[i].CJmp.iaddr);
print_irparam(&i->tok.CJmp.condition);
printf(" %zx", i->tok.CJmp.iaddr);
break;
default:
case IRCallInternal: {
const BuiltinFunc *f = &builtin_funcs[i->tok.CallI.fid];
if (f->returns)
printf(" %%%zx", i->tok.CallI.ret_addr);
printf(" %s", f->name);
for (size_t j = 0; j < i->tok.CallI.n_args; j++) {
printf(" ");
print_irparam(&i->tok.CallI.args[j]);
}
break;
}
printf(" ; %zu:%zu", v->toks[i].ln, v->toks[i].col);
case IRArrMake: {
printf(" %%%zx", i->tok.ArrMake.arr_addr);
for (size_t j = 0; j < i->tok.ArrMake.len; j++) {
printf(" ");
print_irparam(&i->tok.ArrMake.vals[j]);
}
break;
}
default: ASSERT_UNREACHED();
}
printf(" ; %zu:%zu", i->tok.ln, i->tok.col);
printf("\n");
}
}
void optimize_ir(IRList *v) {
irlist_update_index(v);
for (IRItem *i = v->begin; i; i = i->next) {
switch (i->tok.instr) {
case IRJmp: {
/* resolve jump chains (esp. produced by if-else-if... statements) */
size_t ja = i->tok.Jmp.iaddr;
while (ja < v->len && v->index[ja]->tok.instr == IRJmp)
ja = v->index[ja]->tok.Jmp.iaddr;
i->tok.Jmp.iaddr = ja;
}
default: break;
}
}
}

89
ir.h
View File

@@ -3,6 +3,36 @@
#include "tok.h"
typedef struct BuiltinFunc {
enum {
FuncFixedArgs,
FuncVarArgs,
} kind;
bool returns : 1;
bool side_effects : 1;
char *name;
size_t fid; /* function ID, assigned automatically */
union {
struct {
size_t n_args;
union {
struct { Value (*func)(Value *args); } WithRet;
struct { void (*func)(Value *args); } NoRet;
};
} FixedArgs;
struct {
size_t min_args;
union {
struct { Value (*func)(size_t extra_args, Value *args); } WithRet;
struct { void (*func)(size_t extra_args, Value *args); } NoRet;
};
} VarArgs;
};
} BuiltinFunc;
enum IRInstr {
IRSet,
IRNeg,
@@ -10,9 +40,18 @@ enum IRInstr {
IRSub,
IRMul,
IRDiv,
IRPrint,
IREq,
IRNeq,
IRLt,
IRLe,
IRNot,
IRAnd,
IROr,
IRJmp,
IRJnz,
IRCallInternal,
IRAddrOf,
IRArrMake,
IRInstrEnumSize,
};
typedef enum IRInstr IRInstr;
@@ -51,12 +90,7 @@ typedef struct IRTok {
struct {
size_t addr;
IRParam lhs, rhs;
} Arith;
struct {
IRArgs *args;
size_t args_size;
} Print;
} Binary;
struct {
size_t iaddr;
@@ -66,18 +100,43 @@ typedef struct IRTok {
size_t iaddr;
IRParam condition;
} CJmp;
struct {
size_t ret_addr;
size_t fid;
size_t n_args;
IRParam *args;
} CallI;
struct {
size_t arr_addr;
size_t len, cap;
IRParam *vals;
} ArrMake;
};
} IRTok;
typedef struct IRToks {
size_t len, cap;
IRTok *toks;
} IRToks;
typedef struct IRItem {
struct IRItem *next;
IRTok tok;
} IRItem;
void irtoks_init(IRToks *v);
void irtoks_term(IRToks *v);
void irtoks_app(IRToks *v, IRTok t);
typedef struct IRList {
IRItem *begin, *end;
Pool *p;
IRItem **index; /* index to pointer, irlist_update_index() must be called before use */
size_t len;
} IRList;
void print_ir(IRToks *v);
void irlist_init_long(IRList *v);
void irlist_init_short(IRList *v);
void irlist_term(IRList *v);
void irlist_app(IRList *v, IRTok t);
void irlist_eat_irlist(IRList *v, IRList *other);
void irlist_update_index(IRList *v); /* should be used very conservatively */
void print_ir(IRList *v, const BuiltinFunc *builtin_funcs);
void optimize_ir(IRList *v);
#endif /* IR_H */

160
lex.c
View File

@@ -2,6 +2,8 @@
#include "util.h"
#define TAB_WIDTH 4
typedef struct Pos {
size_t ln, col; /* current position */
size_t m_ln, m_col; /* marked position */
@@ -11,12 +13,15 @@ static void consume(Pos *p, char c);
static void emit(TokList *toks, const Pos *p, Tok t);
static void mark(Pos *p);
static void mark_err(const Pos *p);
static char get_esc_char(char c);
static void consume(Pos *p, char c) {
if (c == '\n') {
p->ln++;
p->col = 1;
} else
} else if (c == '\t')
p->col += TAB_WIDTH;
else
p->col++;
}
@@ -36,6 +41,23 @@ static void mark_err(const Pos *p) {
err_col = p->m_col;
}
static char get_esc_char(char c) {
switch(c) {
case 'a': return '\a';
case 'b': return '\b';
case 'e': return '\033';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
case '\\': return '\\';
case '\'': return '\'';
case '"': return '\"';
default: return 0;
}
}
TokList lex(const char *s) {
TokList toks;
toklist_init(&toks);
@@ -54,8 +76,14 @@ TokList lex(const char *s) {
}
if (streq_0_n("if", start, i))
emit(&toks, &pos, (Tok){ .kind = TokIf });
else if (streq_0_n("else", start, i))
emit(&toks, &pos, (Tok){ .kind = TokElse });
else if (streq_0_n("while", start, i))
emit(&toks, &pos, (Tok){ .kind = TokWhile });
else if (streq_0_n("true", start, i))
emit(&toks, &pos, (Tok){ .kind = TokVal, .Val = { .type = TypeBool, .Bool = true }});
else if (streq_0_n("false", start, i))
emit(&toks, &pos, (Tok){ .kind = TokVal, .Val = { .type = TypeBool, .Bool = false }});
else {
emit(&toks, &pos, (Tok){
.kind = TokIdent,
@@ -118,9 +146,7 @@ TokList lex(const char *s) {
emit(&toks, &pos, (Tok){
.kind = TokVal,
.Val = {
.type = {
.kind = TypeFloat,
},
.type = TypeFloat,
.Float = num,
},
});
@@ -136,9 +162,7 @@ TokList lex(const char *s) {
emit(&toks, &pos, (Tok){
.kind = TokVal,
.Val = {
.type = {
.kind = TypeInt,
},
.type = TypeInt,
.Int = num,
},
});
@@ -160,20 +184,71 @@ TokList lex(const char *s) {
break;
case ':':
consume(&pos, *(s++));
if (s[0] == '=') {
if (s[0] == '=')
emit(&toks, &pos, (Tok){ .kind = TokDeclare });
} else {
else {
set_err("Expected ':='");
return toks;
}
break;
case '=':
consume(&pos, *(s++));
if (s[0] == '=')
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpEq });
else {
emit(&toks, &pos, (Tok){ .kind = TokAssign });
continue;
}
break;
case '<':
consume(&pos, *(s++));
if (s[0] == '=')
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpLe });
else {
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpLt });
continue;
}
break;
case '>':
consume(&pos, *(s++));
if (s[0] == '=')
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpGe });
else {
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpGt });
continue;
}
break;
case '&':
consume(&pos, *(s++));
if (s[0] == '&')
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpAnd });
else {
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpAddrOf });
continue;
}
break;
case '!':
consume(&pos, *(s++));
if (s[0] == '=')
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpNeq });
else {
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpNot });
continue;
}
break;
case '|':
consume(&pos, *(s++));
if (s[0] == '|')
emit(&toks, &pos, (Tok){ .kind = TokOp, .Op = OpOr });
else
continue;
break;
case '{':
case '}':
case '(':
case ')':
case '[':
case ']':
case ',':
case '+':
case '-':
@@ -217,6 +292,73 @@ TokList lex(const char *s) {
});
}
continue;
case '\'': {
consume(&pos, *(s++));
char c = s[0];
if (c == '\\') {
consume(&pos, *(s++));
c = get_esc_char(s[0]);
if (!c) {
set_err("Unrecognized escape sequence: '\\%c'", s[0]);
return toks;
}
}
consume(&pos, *(s++));
if (s[0] != '\'') {
set_err("Unclosed char literal");
return toks;
}
emit(&toks, &pos, (Tok){ .kind = TokVal, .Val = { .type = TypeChar, .Char = c }});
break;
}
case '"': {
consume(&pos, *(s++));
const char *start = s;
Pos start_pos = pos;
size_t size = 0;
/* count the string size before allocating */
while (s[0] != '"') {
if (!s[0]) {
set_err("Unexpected EOF in string literal");
return toks;
} else if (s[0] == '\\')
consume(&pos, *(s++));
consume(&pos, *(s++));
size++;
}
/* go through the actual string */
s = start;
pos = start_pos;
char *str = xmalloc(size);
for (size_t i = 0; i < size; i++) {
char c = s[0];
if (c == '\\') {
consume(&pos, *(s++));
c = get_esc_char(s[0]);
if (!c) {
set_err("Unrecognized escape sequence: '\\%c'", s[0]);
free(str);
return toks;
}
}
consume(&pos, *(s++));
str[i] = c;
}
emit(&toks, &pos, (Tok){ .kind = TokVal, .Val = {
.type = TypeArr,
.Arr = {
.is_string = true,
.dynamically_allocated = false,
.type = TypeChar,
.vals = str,
.len = size,
.cap = size,
},
},});
break;
}
default:
set_err("Unrecognized character: '%c'", s[0]);
return toks;

208
main.c
View File

@@ -8,7 +8,9 @@
#include "ir.h"
#include "lex.h"
#include "parse.h"
#include "runtime.h"
#include "util.h"
#include "vm.h"
static void usage(const char *prgname);
static void die(const char *fmt, ...);
@@ -32,6 +34,181 @@ static void die(const char *fmt, ...) {
exit(1);
}
static void fn_put(size_t extra_args, Value *args) {
for (size_t i = 0;; i++) {
print_value(&args[i], true);
if (i+1 >= extra_args)
break;
printf(" ");
}
}
static void fn_putln(size_t extra_args, Value *args) {
fn_put(extra_args, args);
printf("\n");
}
static Value fn_int(Value *args) {
Value ret = {
.type = TypeInt,
.Int = 0,
};
switch (args[0].type) {
case TypeVoid: break;
case TypeFloat: ret.Int = (ssize_t)args[0].Float; break;
case TypeInt: ret.Int = args[0].Int; break;
case TypeBool: ret.Int = (ssize_t)args[0].Bool; break;
case TypeChar: ret.Int = (ssize_t)args[0].Char; break;
case TypeArr:
if (args[0].Arr.is_string && args[0].Arr.type == TypeChar) {
ssize_t endpos;
ret.Int = stoimax((char*)args[0].Arr.vals, args[0].Arr.len, 10, &endpos);
if (endpos != -1) {
set_err("Error converting from string to int");
return (Value){0};
}
} else
ASSERT_UNREACHED();
break;
default: ASSERT_UNREACHED();
}
return ret;
}
static Value fn_float(Value *args) {
Value ret = {
.type = TypeFloat,
.Float = 0.0,
};
switch (args[0].type) {
case TypeVoid: break;
case TypeFloat: ret.Float = args[0].Float; break;
case TypeInt: ret.Float = (double)args[0].Int; break;
case TypeBool: ret.Float = (double)args[0].Bool; break;
case TypeChar: ret.Float = (double)args[0].Char; break;
case TypeArr:
if (args[0].Arr.is_string && args[0].Arr.type == TypeChar) {
ssize_t endpos;
ret.Float = stod((char*)args[0].Arr.vals, args[0].Arr.len, &endpos);
if (endpos != -1) {
set_err("Error converting from string to float");
return (Value){0};
}
} else
ASSERT_UNREACHED();
break;
default: ASSERT_UNREACHED();
}
return ret;
}
static Value fn_bool(Value *args) {
return (Value){ .type = TypeBool, .Bool = is_nonzero(&args[0]) };
}
static Value fn_char(Value *args) {
Value ret = {
.type = TypeChar,
.Float = 0.0,
};
switch (args[0].type) {
case TypeVoid: break;
case TypeFloat: ret.Char = (char)args[0].Float; break;
case TypeInt: ret.Char = (char)args[0].Int; break;
case TypeBool: ret.Char = (char)args[0].Bool; break;
case TypeChar: ret.Char = args[0].Char; break;
default: ASSERT_UNREACHED();
}
return ret;
}
static Value fn_ptr(Value *args) {
(void)args;
return (Value){ .type = TypePtr, .Ptr = { .type = TypeVoid, .val = NULL }};
}
static Value fn_string(Value *args) {
char *res = xmalloc(64);
size_t len;
switch (args[0].type) {
case TypeVoid: strcpy(res, "(void)"); len = 6; break;
case TypeFloat: len = snprintf(res, 64, "%f", args[0].Float); break;
case TypeInt: len = snprintf(res, 64, "%zd", args[0].Int); break;
case TypeBool:
if (args[0].Bool) {
strcpy(res, "true");
len = 4;
} else {
strcpy(res, "false");
len = 5;
}
break;
case TypeChar: res[0] = args[0].Char; len = 1; break;
default: ASSERT_UNREACHED();
}
return (Value){
.type = TypeArr,
.Arr = {
.is_string = true,
.dynamically_allocated = true,
.type = TypeChar,
.vals = res,
.len = len,
.cap = 64,
},
};
}
static Value fn_pow(Value *args) {
if (!(args[0].type == TypeFloat && args[1].type == TypeFloat)) {
set_err("pow() requires arguments of type float");
return (Value){0};
}
return (Value){
.type = TypeFloat,
.Float = pow(args[0].Float, args[1].Float),
};
}
static void fn_sleep(Value *args) {
if (!(args[0].type == TypeFloat && args[0].Float >= 0.0)) {
set_err("sleep() requires a positive float");
return;
}
sleep_secs(args[0].Float);
}
static Value fn_getln(Value *args) {
(void)args;
char *line = xmalloc(64);
size_t len = 0, cap = 64;
for (;;) {
int c = fgetc(stdin);
if (c == EOF)
break;
else if (c == '\n')
break;
if (len+1 > cap)
line = xrealloc(line, (cap *= 2));
line[len++] = c;
}
return (Value){
.type = TypeArr,
.Arr = {
.is_string = true,
.dynamically_allocated = true,
.type = TypeChar,
.vals = line,
.len = len,
.cap = cap,
},
};
}
int main(int argc, const char **argv) {
/* parse arguments */
size_t nargs = argc - 1;
@@ -88,17 +265,38 @@ int main(int argc, const char **argv) {
if (opt_emit_tokens)
print_toks(&tokens);
/* parse tokens into IR code */
IRToks ir = parse(&tokens);
BuiltinFunc funcs[] = {
{ .name = "put", .kind = FuncVarArgs, .returns = false, .side_effects = true, .VarArgs = { .min_args = 0, .NoRet.func = fn_put, }},
{ .name = "putln", .kind = FuncVarArgs, .returns = false, .side_effects = true, .VarArgs = { .min_args = 0, .NoRet.func = fn_putln, }},
{ .name = "int", .kind = FuncFixedArgs, .returns = true, .side_effects = false, .FixedArgs = { .n_args = 1, .WithRet.func = fn_int, }},
{ .name = "float", .kind = FuncFixedArgs, .returns = true, .side_effects = false, .FixedArgs = { .n_args = 1, .WithRet.func = fn_float, }},
{ .name = "bool", .kind = FuncFixedArgs, .returns = true, .side_effects = false, .FixedArgs = { .n_args = 1, .WithRet.func = fn_bool, }},
{ .name = "char", .kind = FuncFixedArgs, .returns = true, .side_effects = false, .FixedArgs = { .n_args = 1, .WithRet.func = fn_char, }},
{ .name = "ptr", .kind = FuncFixedArgs, .returns = true, .side_effects = false, .FixedArgs = { .n_args = 0, .WithRet.func = fn_ptr, }},
{ .name = "string", .kind = FuncFixedArgs, .returns = true, .side_effects = false, .FixedArgs = { .n_args = 1, .WithRet.func = fn_string, }},
{ .name = "pow", .kind = FuncFixedArgs, .returns = true, .side_effects = false, .FixedArgs = { .n_args = 2, .WithRet.func = fn_pow, }},
{ .name = "sleep", .kind = FuncFixedArgs, .returns = false, .side_effects = true, .FixedArgs = { .n_args = 1, .NoRet.func = fn_sleep, }},
{ .name = "getln", .kind = FuncFixedArgs, .returns = true, .side_effects = true, .FixedArgs = { .n_args = 0, .WithRet.func = fn_getln, }},
};
IRList ir = parse(&tokens, funcs, sizeof(funcs) / sizeof(funcs[0]));
if (err) {
irtoks_term(&ir);
irlist_term(&ir);
toklist_term(&tokens);
fprintf(stderr, C_IRED "Parser error" C_RESET " in " C_CYAN "%s" C_RESET ":%zu:%zu: %s\n", filename, err_ln, err_col, errbuf);
return 1;
}
toklist_term(&tokens);
optimize_ir(&ir);
if (opt_emit_ir)
print_ir(&ir);
print_ir(&ir, funcs);
/* run the IR */
/* TODO... */
irtoks_term(&ir);
if (!opt_dry) {
run(&ir, funcs);
if (err) {
irlist_term(&ir);
fprintf(stderr, C_IRED "Runtime error" C_RESET " in " C_CYAN "%s" C_RESET ":%zu:%zu: %s\n", filename, err_ln, err_col, errbuf);
return 1;
}
}
irlist_term(&ir);
}

2
map.c
View File

@@ -14,7 +14,7 @@ static void init_with_cap(Map *m, size_t val_size, size_t cap) {
m->len = 0;
m->cap = cap;
m->val_size = val_size;
void *data = malloc(sizeof(MapSlot) * cap + val_size * cap);
void *data = xmalloc(sizeof(MapSlot) * cap + val_size * cap);
m->slots = data;
m->vals = m->slots + cap;
for (size_t i = 0; i < cap; i++) {

780
parse.c
View File

@@ -5,11 +5,6 @@
#include "map.h"
#include "runtime.h"
typedef struct State {
TokList *toks;
IRToks *ir;
} State;
typedef struct Scope {
struct Scope *parent;
size_t mem_addr;
@@ -17,32 +12,68 @@ typedef struct Scope {
Map ident_addrs;
} Scope;
typedef struct ExprMode {
bool ignore_newln;
typedef struct ExprRet {
enum {
ExprModeJustCollapse, /* should leave either a literal or an own address as result */
ExprModeStorageAddr, /* should use the supplied storage address in any case; should leave no token behind */
ExprRetVal,
ExprRetIdent,
ExprRetLastInstr,
} kind;
union {
size_t StorageAddr;
IRTok LastInstr;
};
} ExprMode;
} ExprRet;
static void mark_err(const Tok *t);
static void set_irtok_dest_addr(IRTok *t, size_t addr);
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos);
static IRParam tok_to_irparam(Scope *sc, Tok *t);
static Scope make_scope(Scope *parent, bool with_idents);
static void term_scope(Scope *sc);
static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode);
static void stmt(State *s, Scope *sc, TokListItem *t);
static bool expr_flush_ir_and_maybe_return(IRList *out_ir, TokList *toks, IRTok instr, TokListItem *expr_start, Scope *expr_scope, TokListItem *t, ExprRet *out_ret);
static void make_value_statically_allocated(Value *v);
static ExprRet expr(IRList *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t);
static void expr_into_addr(IRList *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t, size_t addr);
static IRParam expr_into_irparam(IRList *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t);
static void skip_newlns(TokList *toks, TokListItem *from);
static void stmt(IRList *out_ir, TokList *toks, Map *funcs, Scope *sc, TokListItem *t);
static void mark_err(const Tok *t) {
err_ln = t->ln;
err_col = t->col;
}
static void set_irtok_dest_addr(IRTok *t, size_t addr) {
switch (t->instr) {
case IRSet:
case IRNeg:
case IRNot:
case IRAddrOf:
t->Unary.addr = addr;
break;
case IRAdd:
case IRSub:
case IRMul:
case IRDiv:
case IREq:
case IRNeq:
case IRLt:
case IRLe:
case IRAnd:
case IROr:
t->Binary.addr = addr;
break;
case IRCallInternal:
t->CallI.ret_addr = addr;
break;
case IRArrMake:
t->ArrMake.arr_addr = addr;
break;
default:
ASSERT_UNREACHED();
}
}
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos) {
size_t addr;
bool exists = false;
@@ -96,8 +127,67 @@ static void term_scope(Scope *sc) {
map_term(&sc->ident_addrs);
}
static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode) {
/* A simplified example of how the operator precedence parsing works:
/* If ir_tok is the underlying expr() call's last evaluation, this function
* deletes t from toks, sets *out_ret and tells the caller it can return
* *out_ret by returning true.
*
* If ir_tok is not the expression's last instruction, ir_tok is written to
* out_ir and t is replaced by a pointer to the result's memory address.
* */
static bool expr_flush_ir_and_maybe_return(IRList *out_ir, TokList *toks, IRTok ir_tok, TokListItem *expr_start, Scope *expr_scope, TokListItem *t, ExprRet *out_ret) {
if (t == expr_start && t->next->tok.kind == TokOp && op_prec[t->next->tok.Op] == PREC_DELIM) {
/* ir_tok was the expression's last IR instruction. */
toklist_del(toks, t, t);
*out_ret = (ExprRet){
.kind = ExprRetLastInstr,
.LastInstr = ir_tok,
};
return true;
} else {
/* ir_tok was not the expression's last IR instruction. */
size_t dest_addr = expr_scope->mem_addr++;
set_irtok_dest_addr(&ir_tok, dest_addr);
irlist_app(out_ir, ir_tok);
t->tok = (Tok){
.kind = TokIdent,
.Ident = {
.kind = IdentAddr,
.Addr = dest_addr,
},
};
return false;
}
}
static void make_value_statically_allocated(Value *v) {
switch (v->type) {
case TypeArr: v->Arr.dynamically_allocated = false; break;
default: break;
}
}
/* The job of this function is to reduce the expression to the most simple form
* writing the least IR instructions possible (without overanalyzing).
* This means that the only IR instructions it will be writing are those for
* calculating intermediate values.
* In the case of ExprRetVal and ExprRetIdent, the value isn't 'returned' in
* the traditional sense, but rather the result is left in the token stream.
* The 'return' value can be of 3 different types:
* - ExprRetVal: The expression yields a constant value as a result.
* Examples: '5', '5 + 2 * 3' or '5 + (2 + 1) * 3'
* - ExprRetIdent: The expression yields an identifier as a result.
* Examples: 'a' or '(((a)))'
* - ExprRetLastInstr: The expression is a more complex sequence of
* instructions. Here the last instruction is returned so the caller can
* manually set the destination address.
* Examples: 'a + 1', '2 + a * b' or '2 + 4 * (b * b) / 5'
*
* Here is also a simplified example of how the operator precedence parsing works:
* ________________________________
* Where t points to (between l_op and r_op in each step)
* |
@@ -132,73 +222,303 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode) {
* l_op r_op
* both l_op and r_op are delimiters (their precedence is PREC_DELIM) => done
*/
static ExprRet expr(IRList *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t) {
TokListItem *start = t;
/* Each expression and subexpression has its own scope. */
Scope sc = make_scope(parent_sc, false);
for (;;) {
/* Prepare to collapse negative factor. */
bool negate = false;
if (t->tok.kind == TokOp && t->tok.Op == OpSub) {
/* Prepare to collapse unary operation. */
bool perform_unary = false;
IRInstr unary_op;
if (t->tok.kind == TokOp) {
if (t->tok.Op == OpSub) {
t = t->next;
negate = true;
perform_unary = true;
unary_op = IRNeg;
} else if (t->tok.Op == OpNot) {
t = t->next;
perform_unary = true;
unary_op = IRNot;
} else if (t->tok.Op == OpAddrOf) {
t = t->next;
perform_unary = true;
unary_op = IRAddrOf;
}
}
/* Ignore newlines if told to do so. */
if (mode.ignore_newln && t->next->tok.kind == TokOp && t->next->tok.Op == OpNewLn)
toklist_del(s->toks, t->next, t->next);
/* Collapse negative factor. */
if (negate) {
bool is_last_operation = t->prev == start && t->next->tok.kind == TokOp && op_prec[t->next->tok.Op] == PREC_DELIM;
Tok *v = &t->tok;
t = t->prev;
toklist_del(s->toks, t->next, t->next);
if (v->kind == TokVal) {
/* immediately negate value */
t->tok.kind = TokVal;
t->tok.Val.type.kind = v->Val.type.kind;
switch (v->Val.type.kind) {
case TypeInt: t->tok.Val.Int = -v->Val.Int; break;
case TypeFloat: t->tok.Val.Float = -v->Val.Float; break;
default: ASSERT_UNREACHED();
/* Delete newline if we're definitely expecting an operand. */
if (t->tok.kind == TokOp && t->tok.Op == OpNewLn) {
if (t == start)
start = t->next;
t = t->next;
toklist_del(toks, t->prev, t->prev);
}
} else {
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc.mem_addr++;
/* add IR instruction to negate the value */
IRParam v_irparam;
TRY(v_irparam = tok_to_irparam(&sc, v));
irtoks_app(s->ir, (IRTok){
/* Collapse parentheses. */
if (t->tok.kind == TokOp && t->tok.Op == OpLParen) {
ExprRet r;
TRY_RET(r = expr(out_ir, toks, funcs, &sc, t->next), (ExprRet){0});
if (r.kind == ExprRetLastInstr) {
size_t res_addr = sc.mem_addr++;
set_irtok_dest_addr(&r.LastInstr, res_addr);
irlist_app(out_ir, r.LastInstr);
t->tok = (Tok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRNeg,
.Unary = {
.addr = res_addr,
.val = v_irparam,
},
});
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(s->toks, t, t);
return;
} else {
/* leave new memory address as result */
t->tok.kind = TokIdent;
t->tok.Ident = (Identifier){
.kind = TokIdent,
.Ident = {
.kind = IdentAddr,
.Addr = res_addr,
},
};
} else if (r.kind == ExprRetVal || r.kind == ExprRetIdent) {
t->tok = t->next->tok;
toklist_del(toks, t->next, t->next);
} else
ASSERT_UNREACHED();
toklist_del(toks, t->next, t->next);
}
/* Collapse function call. */
else if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName && t->next->tok.kind == TokOp && t->next->tok.Op == OpLParen) {
/* get function */
BuiltinFunc func;
bool exists = map_get(funcs, t->tok.Ident.Name, &func);
if (!exists) {
mark_err(&t->tok);
set_err("Unrecognized function: %s()", t->tok.Ident.Name);
return (ExprRet){0};
}
TokListItem *func_ident = t;
t = func_ident->next;
/* we want to try to eliminate function calls at runtime if possible */
bool eval_func_in_place = !func.side_effects;
size_t args_len = 0;
IRParam *args = NULL;
if (t->next->tok.kind == TokOp && t->next->tok.Op == OpRParen) {
/* no args */
toklist_del(toks, t->next, t->next); /* delete right parenthesis */
} else {
/* go through the arguments, evaluate them and put them into the args array */
size_t args_cap = 16;
args = xmalloc(sizeof(IRParam) * args_cap);
for (;;) {
if (args_len+1 > args_cap)
args = xrealloc(args, (args_cap *= 2));
IRParam a;
TRY_RET_ELSE(a = expr_into_irparam(out_ir, toks, funcs, &sc, t->next), (ExprRet){0}, free(args));
args[args_len++] = a;
if (a.kind != IRParamLiteral)
eval_func_in_place = false;
if (t->next->tok.kind == TokOp) {
if (t->next->tok.Op == OpComma) {
toklist_del(toks, t->next, t->next); /* delete comma */
continue;
} else if (t->next->tok.Op == OpRParen) {
toklist_del(toks, t->next, t->next); /* delete right parenthesis */
break;
}
}
mark_err(&t->next->tok);
set_err("Expected ',' or ')' after function argument");
free(args);
return (ExprRet){0};
}
}
t = func_ident;
toklist_del(toks, t->next, t->next); /* delete left parenthesis */
if (func.kind == FuncFixedArgs && args_len != func.FixedArgs.n_args) {
mark_err(&func_ident->tok);
const char *plural = func.FixedArgs.n_args == 1 ? "" : "s";
set_err("Function %s() takes %zu argument%s but got %zu", func.name, func.FixedArgs.n_args, plural, args_len);
if (args)
free(args);
return (ExprRet){0};
} else if (func.kind == FuncVarArgs && args_len < func.VarArgs.min_args) {
mark_err(&func_ident->tok);
const char *plural = func.VarArgs.min_args == 1 ? "" : "s";
set_err("Function %s() requires at least %zu argument%s but only got %zu", func.name, func.VarArgs.min_args, plural, args_len);
if (args)
free(args);
return (ExprRet){0};
}
if (eval_func_in_place) {
/* evaluate the function in place */
if (!func.returns)
/* If the function had no side effects and returned nothing,
* that function would do absolutely nothing, which would
* make no sense. */
ASSERT_UNREACHED();
Value *arg_vals = args_len ? xmalloc(sizeof(Value) * args_len) : NULL;
for (size_t i = 0; i < args_len; i++)
arg_vals[i] = args[i].Literal;
mark_err(&func_ident->tok);
func_ident->tok = (Tok) {
.kind = TokVal,
.Val = func.kind == FuncVarArgs ?
func.VarArgs.WithRet.func(args_len - func.VarArgs.min_args, arg_vals)
: func.FixedArgs.WithRet.func(arg_vals),
};
/* since we have a literal return value, we want it to be fully treated like one by the memory manager */
make_value_statically_allocated(&func_ident->tok.Val);
/* immediately free any heap-allocated literals that are no longer needed */
for (size_t i = 0; i < args_len; i++)
free_value(&arg_vals[i], true);
/* free buffers */
if (arg_vals)
free(arg_vals);
if (args)
free(args);
} else {
/* function call IR instruction */
IRTok ir_tok = {
.ln = func_ident->tok.ln,
.col = func_ident->tok.col,
.instr = IRCallInternal,
.CallI = {
.ret_addr = 0,
.fid = func.fid,
.n_args = args_len,
.args = args,
},
};
/* return if we've just evaluated the last instruction */
ExprRet ret;
if (expr_flush_ir_and_maybe_return(out_ir, toks, ir_tok, start, &sc, func_ident, &ret))
return ret;
}
}
/* Collapse array. */
else if (t->tok.kind == TokOp && t->tok.Op == OpLBrack) {
TokListItem *lbrack = t;
bool eval_immediately = true;
size_t elems_len = 0;
size_t elems_cap = 0;
IRParam *elems = NULL;
if (t->next->tok.kind == TokOp && t->next->tok.Op == OpRBrack) {
/* empty array */
toklist_del(toks, t->next, t->next); /* delete right bracket */
} else {
elems_cap = 16;
elems = xmalloc(sizeof(IRParam) * elems_cap);
for (;;) {
if (elems_len+1 > elems_cap)
elems = xrealloc(elems, (elems_cap *= 2));
IRParam e;
TRY_RET_ELSE(e = expr_into_irparam(out_ir, toks, funcs, &sc, t->next), (ExprRet){0}, free(elems));
if (e.kind != IRParamLiteral)
eval_immediately = false;
elems[elems_len++] = e;
if (t->next->tok.kind == TokOp) {
if (t->next->tok.Op == OpComma) {
toklist_del(toks, t->next, t->next); /* delete comma */
continue;
} else if (t->next->tok.Op == OpRBrack) {
toklist_del(toks, t->next, t->next); /* delete right bracket */
break;
}
}
mark_err(&t->next->tok);
set_err("Expected ',' or ']' after array element");
free(elems);
return (ExprRet){0};
}
}
if (eval_immediately) {
/* turn array into value */
Value arr = {
.type = TypeArr,
.Arr = {
.type = TypeVoid,
.is_string = false,
.dynamically_allocated = false,
.vals = NULL,
.len = elems_len,
.cap = elems_len ? elems_cap : 0,
},
};
if (elems_len) {
Type arr_ty = elems[0].Literal.type;
void *arr_vals = xmalloc(type_size[arr_ty] * elems_cap);
for (size_t i = 0; i < elems_len; i++) {
Value *v = &elems[i].Literal;
if (v->type != arr_ty) {
set_err("Type of array item %zu (%s) differs from array type (%s)", i, type_str[v->type], type_str[arr_ty]);
free(arr_vals);
free(elems);
return (ExprRet){0};
}
memcpy((uint8_t*)arr_vals + type_size[arr_ty] * i, &v->Void, type_size[arr_ty]);
}
arr.Arr.type = arr_ty;
arr.Arr.vals = arr_vals;
}
/* set lbracket to collapsed array value */
lbrack->tok.kind = TokVal;
lbrack->tok.Val = arr;
/* free the now no longer needed element IRParam values */
free(elems);
} else {
/* array initialization IR instruction */
IRTok ir_tok = {
.ln = lbrack->tok.ln,
.col = lbrack->tok.col,
.instr = IRArrMake,
.ArrMake = {
.arr_addr = 0,
.len = elems_len,
.cap = elems_cap,
.vals = elems,
},
};
/* return if we've just evaluated the last instruction */
ExprRet ret;
if (expr_flush_ir_and_maybe_return(out_ir, toks, ir_tok, start, &sc, lbrack, &ret))
return ret;
}
}
/* Collapse unary operation. */
if (perform_unary) {
Tok *v = &t->tok; /* what we want to perform the operation on */
t = t->prev; /* go back to the '-' sign */
toklist_del(toks, t->next, t->next); /* again, just removing the reference */
if (v->kind == TokVal) {
/* immediately perform operation */
t->tok.kind = TokVal;
mark_err(&t->tok);
TRY_RET(t->tok.Val = eval_unary(unary_op, &v->Val), (ExprRet){0});
} else {
/* unary IR instruction */
IRParam v_irparam;
TRY_RET(v_irparam = tok_to_irparam(&sc, v), (ExprRet){0});
IRTok ir_tok = {
.ln = t->tok.ln,
.col = t->tok.col,
.instr = unary_op,
.Unary = {
.addr = 0,
.val = v_irparam,
},
};
/* return if we've just evaluated the last instruction */
ExprRet ret;
if (expr_flush_ir_and_maybe_return(out_ir, toks, ir_tok, start, &sc, t, &ret))
return ret;
}
}
@@ -213,7 +533,7 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode) {
if (l_op->kind != TokOp) {
mark_err(l_op);
set_err("Expected operator");
return;
return (ExprRet){0};
}
l_op_prec = op_prec[l_op->Op];
}
@@ -222,61 +542,50 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode) {
if (r_op->kind != TokOp) {
mark_err(r_op);
set_err("Expected operator");
return;
return (ExprRet){0};
}
r_op_prec = op_prec[r_op->Op];
/* If l_op and r_op are both delimiters, the expression is fully evaluated.
* NOTE: Sometimes, we don't reach this point because the function already
* exits directly after the last operation. */
/* If l_op and r_op are both delimiters, we don't have to evaluate
* anything. */
if (l_op_prec == PREC_DELIM && r_op_prec == PREC_DELIM) {
if (t->tok.kind != TokVal && t->tok.kind != TokIdent) {
if (t->tok.kind == TokIdent) {
return (ExprRet){ .kind = ExprRetIdent };
} else if (t->tok.kind == TokVal) {
return (ExprRet){ .kind = ExprRetVal };
} else {
mark_err(&t->tok);
set_err("Expected literal or identifier");
return;
return (ExprRet){0};
}
if (mode.kind == ExprModeStorageAddr) {
IRParam res;
TRY(res = tok_to_irparam(&sc, &t->tok));
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRSet,
.Unary = {
.addr = mode.StorageAddr,
.val = res,
},
});
toklist_del(s->toks, t, t);
}
return;
}
bool is_last_operation = t->prev && t->prev->prev == start && r_op_prec == PREC_DELIM;
/* This is the actual operator precedence parser as described above. */
/* This is the operator precedence parser described above. */
if (r_op_prec > l_op_prec)
t = t->next->next;
else {
/* some basic checks */
Tok *rhs = &t->tok;
if (rhs->kind != TokVal && rhs->kind != TokIdent) {
mark_err(rhs);
set_err("Expected literal or identifier");
return;
return (ExprRet){0};
}
t = t->prev->prev;
Tok *lhs = &t->tok;
if (lhs->kind != TokVal && lhs->kind != TokIdent) {
mark_err(lhs);
set_err("Expected literal or identifier");
return;
return (ExprRet){0};
}
/* delete the tokens that fall away from collapsing the expression
* (NOTE: only their references are deleted here, that's important
* because we're still using their values later on) */
toklist_del(s->toks, t->next, t->next->next);
toklist_del(toks, t->next, t->next->next);
bool swap_operands = false;
IRInstr instr;
switch (l_op->Op) {
@@ -284,91 +593,151 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode) {
case OpSub: instr = IRSub; break;
case OpMul: instr = IRMul; break;
case OpDiv: instr = IRDiv; break;
case OpEq: instr = IREq; break;
case OpNeq: instr = IRNeq; break;
case OpLt: instr = IRLt; break;
case OpLe: instr = IRLe; break;
case OpGt: instr = IRLt; swap_operands = true; break;
case OpGe: instr = IRLe; swap_operands = true; break;
case OpAnd: instr = IRAnd; break;
case OpOr: instr = IROr; break;
default:
mark_err(l_op);
set_err("Unknown operation: '%s'", op_str[l_op->Op]);
return;
return (ExprRet){0};
}
if (lhs->kind == TokVal && rhs->kind == TokVal) {
/* evaluate the constant expression immediately */
Value *lhs_val = swap_operands ? &rhs->Val : &lhs->Val;
Value *rhs_val = swap_operands ? &lhs->Val : &rhs->Val;
lhs->kind = TokVal;
TRY(lhs->Val = eval_arith(instr, &lhs->Val, &rhs->Val));
mark_err(l_op);
TRY_RET(lhs->Val = eval_binary(instr, lhs_val, rhs_val), (ExprRet){0});
} else {
IRParam lhs_irparam, rhs_irparam;
TRY(lhs_irparam = tok_to_irparam(&sc, lhs));
TRY(rhs_irparam = tok_to_irparam(&sc, rhs));
TRY_RET(lhs_irparam = tok_to_irparam(&sc, lhs), (ExprRet){0});
TRY_RET(rhs_irparam = tok_to_irparam(&sc, rhs), (ExprRet){0});
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc.mem_addr++;
/* emit IR code to evaluate the non-constant expression */
irtoks_app(s->ir, (IRTok){
/* binary IR instruction */
IRTok ir_tok = {
.ln = l_op->ln,
.col = l_op->col,
.instr = instr,
.Arith = {
.addr = res_addr,
.lhs = lhs_irparam,
.rhs = rhs_irparam,
.Binary = {
.addr = 0,
.lhs = swap_operands ? rhs_irparam : lhs_irparam,
.rhs = swap_operands ? lhs_irparam : rhs_irparam,
},
};
/* return if we've just evaluated the last instruction */
ExprRet ret;
if (expr_flush_ir_and_maybe_return(out_ir, toks, ir_tok, start, &sc, t, &ret))
return ret;
}
}
}
}
static void expr_into_addr(IRList *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t, size_t addr) {
ExprRet r;
TRY(r = expr(out_ir, toks, funcs, parent_sc, t));
if (r.kind == ExprRetLastInstr) {
set_irtok_dest_addr(&r.LastInstr, addr);
irlist_app(out_ir, r.LastInstr);
t->tok = (Tok){
.ln = t->tok.ln,
.col = t->tok.col,
.kind = TokIdent,
.Ident = {
.kind = IdentAddr,
.Addr = addr,
},
};
} else if (r.kind == ExprRetVal || r.kind == ExprRetIdent) {
IRParam res;
TRY(res = tok_to_irparam(parent_sc, &t->tok));
irlist_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRSet,
.Unary = {
.addr = addr,
.val = res,
},
});
toklist_del(toks, t, t);
} else
ASSERT_UNREACHED();
}
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(s->toks, t, t);
break;
} else {
/* leave new memory address as result */
lhs->kind = TokIdent;
lhs->Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
static IRParam expr_into_irparam(IRList *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t) {
ExprRet r;
TRY_RET(r = expr(out_ir, toks, funcs, parent_sc, t), (IRParam){0});
if (r.kind == ExprRetLastInstr) {
Scope sc = make_scope(parent_sc, false);
size_t addr = sc.mem_addr++;
set_irtok_dest_addr(&r.LastInstr, addr);
irlist_app(out_ir, r.LastInstr);
return (IRParam){
.kind = IRParamAddr,
.Addr = addr,
};
}
}
}
}
} else if (r.kind == ExprRetVal || r.kind == ExprRetIdent) {
IRParam ret;
TRY_RET(ret = tok_to_irparam(parent_sc, &t->tok), (IRParam){0});
toklist_del(toks, t, t);
return ret;
} else
ASSERT_UNREACHED();
}
static void stmt(State *s, Scope *sc, TokListItem *t) {
/* This WILL invalidate *from, so the caller should only call it on a
* TokListItem after any ones that are in use (e.g. skip_newlns(t->next)). */
static void skip_newlns(TokList *toks, TokListItem *from) {
TokListItem *curr = from;
while (curr->tok.kind == TokOp && curr->tok.Op == OpNewLn)
curr = curr->next;
if (curr != from)
toklist_del(toks, from, curr->prev);
}
static void stmt(IRList *out_ir, TokList *toks, Map *funcs, Scope *sc, TokListItem *t) {
TokListItem *start = t;
if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName) {
if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName && (t->next->tok.kind == TokDeclare || t->next->tok.kind == TokAssign)) {
char *name = t->tok.Ident.Name;
t = t->next;
if (t->tok.kind == TokDeclare) {
t = t->next;
size_t addr = sc->mem_addr++;
TRY(expr_into_addr(out_ir, toks, funcs, sc, t->next, addr));
bool replaced = map_insert(&sc->ident_addrs, name, &addr);
if (replaced) {
mark_err(&start->tok);
set_err("'%s' already declared in this scope", name);
return;
}
TRY(expr(s, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
} else if (t->tok.kind == TokAssign) {
t = t->next;
size_t addr;
TRY(addr = get_ident_addr(sc, name, &start->tok));
TRY(expr(s, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
}
TRY(expr_into_addr(out_ir, toks, funcs, sc, t->next, addr));
} else
ASSERT_UNREACHED();
} else if (t->tok.kind == TokOp && t->tok.Op == OpLCurl) {
Scope inner_sc = make_scope(sc, true);
for (;;) {
skip_newlns(toks, t->next);
if (t->next->tok.kind == TokOp) {
if (t->next->tok.Op == OpEOF) {
term_scope(&inner_sc);
mark_err(&start->tok);
set_err("Unclosed '{'");
term_scope(&inner_sc);
return;
}
if (t->next->tok.Op == OpRCurl)
break;
}
TRY_ELSE(stmt(s, &inner_sc, t->next), term_scope(&inner_sc));
TRY_ELSE(stmt(out_ir, toks, funcs, &inner_sc, t->next), term_scope(&inner_sc));
}
term_scope(&inner_sc);
t = t->next;
@@ -381,8 +750,8 @@ static void stmt(State *s, Scope *sc, TokListItem *t) {
* 4: jmp to 1 if condition xyz is met
* */
size_t jmp_instr_iaddr = s->ir->len;
irtoks_app(s->ir, (IRTok){
/* add initial jmp instruction */
irlist_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJmp,
@@ -390,56 +759,129 @@ static void stmt(State *s, Scope *sc, TokListItem *t) {
.iaddr = 0, /* unknown for now */
},
});
IRItem *jmp_instr = out_ir->end;
size_t body_iaddr = out_ir->len;
t = t->next;
/* parse condition */
IRList cond_ir;
irlist_init_short(&cond_ir);
IRParam cond;
TRY_ELSE(cond = expr_into_irparam(&cond_ir, toks, funcs, sc, t->next), irlist_term(&cond_ir));
/* find beginning of while loop body */
TokListItem *lcurl;
for (TokListItem *i = t;; i++) {
if (i == NULL) {
mark_err(&start->tok);
set_err("Expected '{' after 'while' loop condition");
return;
}
if (i->tok.kind == TokOp && i->tok.Op == OpLCurl) {
lcurl = i;
break;
}
}
/* write loop body to IR stream */
TRY(stmt(s, sc, lcurl));
/* parse loop body */
skip_newlns(toks, t->next);
TRY_ELSE(stmt(out_ir, toks, funcs, sc, t->next), irlist_term(&cond_ir));
/* finally we know where the jmp from the beginning has to jump to */
s->ir->toks[jmp_instr_iaddr].Jmp.iaddr = s->ir->len;
jmp_instr->tok.Jmp.iaddr = out_ir->len;
TRY(expr(s, sc, t, (ExprMode){ .kind = ExprModeJustCollapse, .ignore_newln = false }));
IRParam condition;
TRY(condition = tok_to_irparam(sc, &t->tok));
/* append condition IR to program IR, then terminate condition IR stream */
irlist_eat_irlist(out_ir, &cond_ir);
irtoks_app(s->ir, (IRTok){
/* add conditional jump */
irlist_app(out_ir, (IRTok){
.ln = t->next->tok.ln,
.col = t->next->tok.col,
.instr = IRJnz,
.CJmp = {
.iaddr = body_iaddr,
.condition = cond,
},
});
t = t->next;
} else if (t->tok.kind == TokIf) {
/* How if is generally implemented in IR:
* 0: some stuff evaluating condition xyz
* 1: jmp to 5 if condition xyz is met
* 2: some_code in else
* 4: jmp to 6
* 5: some_code in if
* */
/* parse condition */
IRParam cond;
TRY(cond = expr_into_irparam(out_ir, toks, funcs, sc, t->next));
/* add conditional jmp instruction */
irlist_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJnz,
.CJmp = {
.iaddr = jmp_instr_iaddr + 1,
.condition = condition,
.iaddr = 0, /* unknown for now */
.condition = cond,
},
});
}
toklist_del(s->toks, start, t);
IRItem *if_cjmp_instr = out_ir->end;
/* parse if body */
skip_newlns(toks, t->next);
IRList if_body;
irlist_init_short(&if_body);
TRY_ELSE(stmt(&if_body, toks, funcs, sc, t->next), irlist_term(&if_body));
skip_newlns(toks, t->next);
if (t->next->tok.kind == TokElse) {
toklist_del(toks, t->next, t->next);
/* parse and add else body */
skip_newlns(toks, t->next);
TRY_ELSE(stmt(out_ir, toks, funcs, sc, t->next), irlist_term(&if_body));
}
IRToks parse(TokList *toks) {
IRToks ir;
irtoks_init(&ir);
State s = { .toks = toks, .ir = &ir };
/* add jmp instruction to jump back to common code */
irlist_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJmp,
.Jmp = {
.iaddr = 0, /* unknown for now */
},
});
IRItem *else_jmp_instr = out_ir->end;
/* set if condition jmp target */
if_cjmp_instr->tok.CJmp.iaddr = out_ir->len;
/* add if body */
irlist_eat_irlist(out_ir, &if_body);
/* set else jmp target */
else_jmp_instr->tok.CJmp.iaddr = out_ir->len;
} else {
/* assume expression */
TRY(expr_into_irparam(out_ir, toks, funcs, sc, t));
return;
}
toklist_del(toks, start, t);
}
IRList parse(TokList *toks, BuiltinFunc *builtin_funcs, size_t n_builtin_funcs) {
Map funcs;
map_init(&funcs, sizeof(BuiltinFunc));
for (size_t i = 0; i < n_builtin_funcs; i++) {
builtin_funcs[i].fid = i;
bool replaced = map_insert(&funcs, builtin_funcs[i].name, &builtin_funcs[i]);
if (replaced) {
err_ln = 0; err_col = 0;
set_err("Builtin function %s() declared more than once", builtin_funcs[i].name);
map_term(&funcs);
return (IRList){0};
}
}
IRList ir;
irlist_init_long(&ir);
Scope global_scope = make_scope(NULL, true);
for (;;) {
skip_newlns(toks, toks->begin);
if (toks->begin->tok.kind == TokOp && toks->begin->tok.Op == OpEOF)
break;
TRY_RET_ELSE(stmt(&s, &global_scope, toks->begin), ir, term_scope(&global_scope));
TRY_RET_ELSE(stmt(&ir, toks, &funcs, &global_scope, toks->begin), ir,
{ term_scope(&global_scope); map_term(&funcs); });
}
term_scope(&global_scope);
map_term(&funcs);
return ir;
}

View File

@@ -5,6 +5,6 @@
#include "tok.h"
#include "util.h"
IRToks parse(TokList *toks);
IRList parse(TokList *toks, BuiltinFunc *builtin_funcs, size_t n_builtin_funcs);
#endif /* PARSE_H */

112
runtime.c
View File

@@ -2,13 +2,13 @@
#include "util.h"
Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs) {
Value eval_binary(IRInstr instr, const Value *lhs, const Value *rhs) {
switch (instr) {
case IRAdd:
case IRSub:
case IRMul:
case IRDiv: {
if (lhs->type.kind == TypeInt && rhs->type.kind == TypeInt) {
if (lhs->type == TypeInt && rhs->type == TypeInt) {
ssize_t res;
switch (instr) {
case IRAdd: res = lhs->Int + rhs->Int; break;
@@ -18,11 +18,11 @@ Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs) {
default: ASSERT_UNREACHED();
}
return (Value){
.type.kind = TypeInt,
.type = TypeInt,
.Int = res,
};
} else if (lhs->type.kind == TypeFloat && rhs->type.kind == TypeFloat) {
float res;
} else if (lhs->type == TypeFloat && rhs->type == TypeFloat) {
double res;
switch (instr) {
case IRAdd: res = lhs->Float + rhs->Float; break;
case IRSub: res = lhs->Float - rhs->Float; break;
@@ -31,26 +31,122 @@ Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs) {
default: ASSERT_UNREACHED();
}
return (Value){
.type.kind = TypeFloat,
.type = TypeFloat,
.Float = res,
};
} else {
set_err("Unsupported types for operation '%s'", irinstr_str[instr]);
set_err("Unsupported types for operation '%s': %s and %s", irinstr_str[instr], type_str[lhs->type], type_str[rhs->type]);
return (Value){0};
}
}
case IREq:
case IRNeq:
case IRLt:
case IRLe: {
bool res;
if (lhs->type == TypeInt && rhs->type == TypeInt) {
switch (instr) {
case IREq: res = lhs->Int == rhs->Int; break;
case IRNeq: res = lhs->Int != rhs->Int; break;
case IRLt: res = lhs->Int < rhs->Int; break;
case IRLe: res = lhs->Int <= rhs->Int; break;
default: ASSERT_UNREACHED();
};
} else if (lhs->type == TypeFloat && rhs->type == TypeFloat) {
switch (instr) {
case IREq: res = lhs->Float == rhs->Float; break;
case IRNeq: res = lhs->Float != rhs->Float; break;
case IRLt: res = lhs->Float < rhs->Float; break;
case IRLe: res = lhs->Float <= rhs->Float; break;
default: ASSERT_UNREACHED();
};
} else if (lhs->type == TypeArr && lhs->Arr.type == TypeChar && lhs->Arr.is_string &&
rhs->type == TypeArr && rhs->Arr.type == TypeChar && rhs->Arr.is_string) {
switch (instr) {
case IREq:
res = lhs->Arr.len == rhs->Arr.len ? strncmp(lhs->Arr.vals, rhs->Arr.vals, lhs->Arr.len) == 0 : false;
break;
case IRNeq:
res = lhs->Arr.len == rhs->Arr.len ? strncmp(lhs->Arr.vals, rhs->Arr.vals, lhs->Arr.len) != 0 : true;
break;
default:
set_err("String operation '%s' not supported", irinstr_str[instr]);
break;
};
} else {
set_err("Unsupported types for operation '%s': %s and %s", irinstr_str[instr], type_str[lhs->type], type_str[rhs->type]);
return (Value){0};
}
return (Value){
.type = TypeBool,
.Bool = res,
};
}
case IRAnd:
return (Value){
.type = TypeBool,
.Bool = is_nonzero(lhs) && is_nonzero(rhs),
};
break;
case IROr:
return (Value){
.type = TypeBool,
.Bool = is_nonzero(lhs) || is_nonzero(rhs),
};
break;
default:
ASSERT_UNREACHED();
}
return (Value){0};
}
Value eval_unary(IRInstr instr, const Value *v) {
switch (instr) {
case IRSet:
return *v;
case IRNeg:
if (v->type == TypeInt)
return (Value){ .type = TypeInt, .Int = -v->Int };
else if (v->type == TypeFloat)
return (Value){ .type = TypeFloat, .Float = -v->Float };
else {
set_err("Unsupported type for operation '%s': %s", irinstr_str[instr], type_str[v->type]);
return (Value){0};
}
case IRNot:
if (v->type == TypeBool) {
return (Value){ .type = TypeBool, .Bool = !v->Bool };
} else {
set_err("Unsupported type for operation '%s': %s", irinstr_str[instr], type_str[v->type]);
return (Value){0};
}
case IRAddrOf:
set_err("Unable to take the address of a literal");
return (Value){0};
default:
ASSERT_UNREACHED();
}
}
bool is_nonzero(const Value *v) {
switch (v->type) {
case TypeInt: return v->Int != 0;
case TypeFloat: return v->Float != 0.0;
case TypeBool: return v->Bool;
case TypeChar: return v->Char != 0;
case TypePtr: return v->Ptr.val != NULL;
case TypeArr: return v->Arr.len != 0;
default: ASSERT_UNREACHED();
}
}
Value zero_val(Type ty) {
Value ret;
ret.type = ty;
switch (ty.kind) {
switch (ty) {
case TypeInt: ret.Int = 0; break;
case TypeFloat: ret.Float = 0.0; break;
case TypeBool: ret.Bool = false; break;
default: ASSERT_UNREACHED();
}
return ret;

View File

@@ -3,7 +3,9 @@
#include "ir.h"
Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs);
Value eval_binary(IRInstr instr, const Value *lhs, const Value *rhs);
Value eval_unary(IRInstr instr, const Value *v);
bool is_nonzero(const Value *v);
Value zero_val(Type ty);
#endif /* RUNTIME_H */

151
tok.c
View File

@@ -5,16 +5,132 @@
#include "util.h"
size_t type_size[TypeEnumSize] = {
[TypeVoid] = 0,
[TypeFloat] = sizeof(((Value*)NULL)->Float),
[TypeInt] = sizeof(((Value*)NULL)->Int),
[TypeBool] = sizeof(((Value*)NULL)->Bool),
[TypeChar] = sizeof(((Value*)NULL)->Char),
[TypePtr] = sizeof(((Value*)NULL)->Ptr),
[TypeArr] = sizeof(((Value*)NULL)->Arr),
};
const char *type_str[TypeEnumSize] = {
[TypeVoid] = "void",
[TypeFloat] = "float",
[TypeInt] = "int",
[TypeBool] = "bool",
[TypeChar] = "char",
[TypePtr] = "ptr",
[TypeArr] = "arr",
};
/* if purge is set, even statically allocated literals will be freed */
void free_value(Value *v, bool purge) {
switch (v->type) {
case TypeArr:
if (v->Arr.vals && (purge || v->Arr.dynamically_allocated)) {
free(v->Arr.vals);
v->Arr.vals = NULL;
v->Arr.len = 0;
v->Arr.cap = 0;
}
break;
default:
break;
}
}
void print_value(const Value *v, bool raw) {
switch (v->type) {
case TypeVoid:
printf("(void)");
break;
case TypeFloat:
printf("%f", v->Float);
break;
case TypeInt:
printf("%zd", v->Int);
break;
case TypeBool:
printf("%s", v->Bool ? "true" : "false");
break;
case TypeChar:
if (raw)
printf("%c", v->Char);
else {
const char *esc = unescape_char(v->Char);
if (esc) printf("'%s'", esc);
else printf("'%c'", v->Char);
}
break;
case TypePtr: {
if (v->Ptr.val) {
printf("ptr<%s>(", type_str[v->Ptr.type]);
Value deref = { .type = v->Ptr.type };
memcpy(&deref.Void, v->Ptr.val, type_size[v->Ptr.type]);
print_value(&deref, false);
printf(")");
} else
printf("ptr<%s>(nil)", type_str[v->Ptr.type]);
break;
}
case TypeArr:
if (v->Arr.is_string) {
if (v->Arr.type != TypeChar)
ASSERT_UNREACHED();
char *str = v->Arr.vals;
if (!raw)
printf("\"");
for (size_t i = 0; i < v->Arr.len; i++) {
char c = str[i];
if (raw)
printf("%c", c);
else {
const char *esc = unescape_char(c);
if (esc) printf("%s", esc);
else printf("%c", c);
}
}
if (!raw)
printf("\"");
} else {
printf("[");
for (size_t i = 0;; i++) {
size_t ty_sz = type_size[v->Arr.type];
Value ty_val = { .type = v->Arr.type };
memcpy(&ty_val.Void, (uint8_t*)v->Arr.vals + ty_sz * i, ty_sz);
print_value(&ty_val, false);
if (i+1 >= v->Arr.len) break;
printf(", ");
}
printf("]");
}
break;
default:
ASSERT_UNREACHED();
}
}
int8_t op_prec[OperatorEnumSize] = {
[OpEOF] = PREC_DELIM,
[OpNewLn] = PREC_DELIM,
[OpLCurl] = PREC_DELIM,
[OpRParen] = PREC_DELIM,
[OpRBrack] = PREC_DELIM,
[OpComma] = PREC_DELIM,
[OpAdd] = 0,
[OpSub] = 0,
[OpMul] = 1,
[OpDiv] = 1,
[OpAnd] = 0,
[OpOr] = 0,
[OpEq] = 1,
[OpNeq] = 1,
[OpLt] = 1,
[OpGt] = 1,
[OpLe] = 1,
[OpGe] = 1,
[OpAdd] = 2,
[OpSub] = 2,
[OpMul] = 3,
[OpDiv] = 3,
};
const char *op_str[OperatorEnumSize] = {
@@ -22,19 +138,31 @@ const char *op_str[OperatorEnumSize] = {
[OpRCurl] = "}",
[OpLParen] = "(",
[OpRParen] = ")",
[OpLBrack] = "[",
[OpRBrack] = "]",
[OpComma] = ",",
[OpAdd] = "+",
[OpSub] = "-",
[OpMul] = "*",
[OpDiv] = "/",
[OpNot] = "!",
[OpNewLn] = "\\n",
[OpEOF] = "EOF",
[OpEq] = "==",
[OpNeq] = "!=",
[OpLt] = "<",
[OpGt] = ">",
[OpLe] = "<=",
[OpGe] = ">=",
[OpAnd] = "&&",
[OpOr] = "||",
};
const char *tok_str[TokKindEnumSize] = {
[TokAssign] = "=",
[TokDeclare] = ":=",
[TokIf] = "if",
[TokElse] = "else",
[TokWhile] = "while",
};
@@ -92,18 +220,9 @@ void print_toks(TokList *l) {
printf(": " C_ICYAN "%s" C_RESET, op_str[i->tok.Op]);
break;
case TokVal:
printf(C_IYELLOW "Val" C_RESET);
switch (i->tok.Val.type.kind) {
case TypeFloat:
printf(": " C_ICYAN "%f" C_RESET, i->tok.Val.Float);
break;
case TypeInt:
printf(": " C_ICYAN "%zd" C_RESET, i->tok.Val.Int);
break;
default:
printf(" " C_ICYAN "(unknown type)" C_RESET);
break;
}
printf(C_IYELLOW "Val" C_RESET ": " C_ICYAN);
print_value(&i->tok.Val, false);
printf(C_RESET);
break;
case TokIdent:
printf(C_IYELLOW "Ident" C_RESET);

47
tok.h
View File

@@ -2,41 +2,73 @@
#define __TOK_H__
#include <stdint.h>
#include <unistd.h>
#include "util.h"
typedef struct Type {
enum {
enum Type {
TypeVoid = 0,
TypeFloat,
TypeInt,
} kind;
TypeBool,
TypeChar,
TypePtr,
TypeArr,
TypeEnumSize,
};
typedef enum Type Type;
/*union {
};*/
} Type;
extern size_t type_size[TypeEnumSize];
extern const char *type_str[TypeEnumSize];
typedef struct Value {
Type type;
union {
pseudo_void Void;
double Float;
ssize_t Int;
bool Bool;
char Char;
struct {
Type type;
void *val;
} Ptr;
struct {
Type type;
bool is_string : 1;
bool dynamically_allocated : 1;
void *vals;
size_t len, cap;
} Arr;
};
} Value;
void free_value(Value *v, bool purge);
void print_value(const Value *v, bool raw);
enum Operator {
OpLCurl = '{',
OpRCurl = '}',
OpLParen = '(',
OpRParen = ')',
OpLBrack = '[',
OpRBrack = ']',
OpComma = ',',
OpAdd = '+',
OpSub = '-',
OpMul = '*',
OpDiv = '/',
OpNot = '!',
OpAddrOf = '&',
OpBeginNonchars = 256,
OpEq,
OpNeq,
OpLt,
OpGt,
OpLe,
OpGe,
OpAnd,
OpOr,
OpNewLn,
OpEOF,
OperatorEnumSize,
@@ -69,6 +101,7 @@ typedef struct Tok {
TokAssign,
TokDeclare,
TokIf,
TokElse,
TokWhile,
TokKindEnumSize,
} kind;

58
util.c
View File

@@ -2,6 +2,23 @@
#include <stdarg.h>
#ifdef WIN32
#include <windows.h> /* Sleep */
#else
#include <time.h> /* nanosleep */
#endif
void sleep_secs(double secs) {
#if defined(_WIN32) || defined(WIN32)
Sleep(secs * 1000.0);
#else
struct timespec ts;
ts.tv_sec = (time_t)secs;
ts.tv_nsec = (secs - (double)ts.tv_sec) * 1000000000.0;
nanosleep(&ts, NULL);
#endif
}
char errbuf[ERRSZ];
bool err;
size_t err_ln, err_col;
@@ -49,8 +66,28 @@ void set_err(const char *fmt, ...) {
va_end(va);
}
#define XMALLOC_ERR "Failed to allocate %zu bytes: Out of memory\n"
void *xmalloc(size_t size) {
void *ret = malloc(size);
if (!ret) {
fprintf(stderr, XMALLOC_ERR, size);
abort();
}
return ret;
}
void *xrealloc(void *ptr, size_t size) {
void *ret = realloc(ptr, size);
if (!ret) {
fprintf(stderr, XMALLOC_ERR, size);
abort();
}
return ret;
}
Pool *pool_new(size_t init_cap) {
Pool *p = malloc(sizeof(Pool) + init_cap);
Pool *p = xmalloc(sizeof(Pool) + init_cap);
p->len = 0;
p->cap = init_cap;
p->data = p + 1;
@@ -82,7 +119,7 @@ void *pool_alloc(Pool *p, size_t bytes) {
}
char *sndup(const char *s, size_t n) {
char *ret = malloc(n+1);
char *ret = xmalloc(n+1);
if (ret) {
memcpy(ret, s, n);
ret[n] = 0;
@@ -148,6 +185,23 @@ double stod(const char *s, size_t n, ssize_t *endpos) {
return res;
}
const char *unescape_char(char c) {
switch (c) {
case '\a': return "\\a";
case '\b': return "\\b";
case '\033': return "\\e";
case '\f': return "\\f";
case '\n': return "\\n";
case '\r': return "\\r";
case '\t': return "\\t";
case '\v': return "\\v";
case '\\': return "\\\\";
case '\'': return "\\'";
case '"': return "\\\"";
default: return NULL;
}
}
char *mreadfile(FILE *fp) {
if (fseek(fp, 0l, SEEK_END) == -1)
return NULL;

19
util.h
View File

@@ -6,7 +6,15 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#if defined(_WIN32) || defined(WIN32)
#include <windows.h> /* SSIZE_T */
typedef SSIZE_T ssize_t;
#else
#include <unistd.h> /* ssize_t */
#endif
typedef uint8_t pseudo_void;
/* some ANSI color codes */
#define C_RED "\x1b[31m"
@@ -27,6 +35,8 @@
#define C_RESET "\x1b[m"
void sleep_secs(double secs);
#define ERRSZ 4096
extern char errbuf[ERRSZ];
extern bool err;
@@ -37,12 +47,15 @@ extern size_t err_ln, err_col;
#define TRY_RET_ELSE(expr, ret, onerr) {expr; if (err) {onerr; return (ret);}}
void set_err(const char *fmt, ...);
#define ASSERT_UNREACHED() { fprintf(stderr, "Illegal code position reached in %s:%d\n", __FILE__, __LINE__); exit(1); }
#define ASSERT_UNREACHED() { fprintf(stderr, "Illegal code position reached in %s:%d\n", __FILE__, __LINE__); abort(); }
#define IS_NUM(c) (c >= '0' && c <= '9')
#define IS_ALPHA(c) ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
#define IS_ALNUM(c) (IS_ALPHA(c) || IS_NUM(c))
void *xmalloc(size_t size);
void *xrealloc(void *ptr, size_t size);
/* Useful for efficiently allocating lots of data that can all be freed at once afterwards. */
typedef struct Pool {
struct Pool *next;
@@ -64,6 +77,8 @@ char *psndup(Pool *p, const char *s, size_t n);
intmax_t stoimax(const char *s, size_t n, size_t base, ssize_t *endpos /* -1 on success */);
/* convert a non-null-terminated string to a double */
double stod(const char *s, size_t n, ssize_t *endpos /* -1 on success */);
/* return the escape sequence for a given character; return NULL if there is none */
const char *unescape_char(char c);
/* sets errno on failure */
char *mreadfile(FILE *fp);

219
vm.c
View File

@@ -1 +1,220 @@
#include "vm.h"
#include "runtime.h"
#include "util.h"
#define INIT_STACK_CAP 128
typedef struct Stack {
Value *mem;
bool *holds_value;
size_t len, cap;
} Stack;
static Stack stack_make(void);
static void stack_term(Stack *s);
static void stack_fit(Stack *s, size_t idx);
static void stack_assign(Stack *s, size_t idx, const Value *v);
static Stack stack_make(void) {
Stack s;
s.mem = xmalloc(sizeof(Value) * INIT_STACK_CAP);
s.holds_value = xmalloc(sizeof(bool) * INIT_STACK_CAP);
s.cap = INIT_STACK_CAP;
s.len = 0;
for (size_t i = 0; i < s.cap; i++)
s.holds_value[i] = false;
return s;
}
static void stack_term(Stack *s) {
/* free any dynamically allocated objects still alive */
for (size_t i = 0; i < s->cap; i++) {
if (s->holds_value[i])
free_value(&s->mem[i], false);
}
/* free the stack memory itself */
free(s->mem);
free(s->holds_value);
}
static void stack_fit(Stack *s, size_t idx) {
size_t size = idx+1;
if (size > s->cap) {
size_t new_cap = size + s->cap * 2;
s->mem = xrealloc(s->mem, sizeof(Value) * new_cap);
s->holds_value = xrealloc(s->holds_value, sizeof(bool) * new_cap);
for (size_t i = s->cap; i < new_cap; i++)
s->holds_value[i] = false;
s->cap = new_cap;
}
}
static Value *irparam_to_val(Stack *s, IRParam *v) {
if (v->kind == IRParamLiteral)
return &v->Literal;
else if (v->kind == IRParamAddr)
return &s->mem[v->Addr];
else
ASSERT_UNREACHED();
}
static void stack_assign(Stack *s, size_t idx, const Value *v) {
stack_fit(s, idx);
if (s->holds_value[idx])
free_value(&s->mem[idx], false); /* free any overwritten heap-allocated values */
s->mem[idx] = *v;
s->holds_value[idx] = true;
}
void run(IRList *ir, const BuiltinFunc *builtin_funcs) {
/* so we don't have to call malloc on every function call */
size_t fn_args_cap = 16;
Value *fn_args = xmalloc(sizeof(Value) * fn_args_cap);
/* so we can use index-based addressing */
irlist_update_index(ir);
Stack s = stack_make();
for (IRItem *i = ir->begin; i;) {
IRTok *instr = &i->tok;
err_ln = instr->ln;
err_col = instr->col;
switch (instr->instr) {
case IRSet:
case IRNeg:
case IRNot: {
Value res;
TRY_ELSE(res = eval_unary(instr->instr, irparam_to_val(&s, &instr->Unary.val)),
{free(fn_args); stack_term(&s);});
stack_assign(&s, instr->Unary.addr, &res);
break;
}
case IRAddrOf: {
if (instr->Unary.val.kind != IRParamAddr) {
set_err("Unable to take the address of a literal");
free(fn_args);
stack_term(&s);
return;
}
Value *v = &s.mem[instr->Unary.val.Addr];
Value res = {
.type = TypePtr,
.Ptr = {
.type = v->type,
.val = &v->Void,
},
};
stack_assign(&s, instr->Unary.addr, &res);
break;
}
case IRAdd:
case IRSub:
case IRDiv:
case IRMul:
case IREq:
case IRNeq:
case IRLt:
case IRLe:
case IRAnd:
case IROr: {
Value res;
TRY_ELSE(res = eval_binary(instr->instr,
irparam_to_val(&s, &instr->Binary.lhs),
irparam_to_val(&s, &instr->Binary.rhs)),
{free(fn_args); stack_term(&s);});
stack_assign(&s, instr->Binary.addr, &res);
break;
}
case IRJmp:
if (instr->Jmp.iaddr < ir->len)
i = ir->index[instr->Jmp.iaddr];
else
i = NULL;
continue;
case IRJnz:
if (is_nonzero(irparam_to_val(&s, &instr->CJmp.condition))) {
if (instr->Jmp.iaddr < ir->len)
i = ir->index[instr->CJmp.iaddr];
else
i = NULL;
continue;
}
break;
case IRCallInternal: {
const BuiltinFunc *f = &builtin_funcs[instr->CallI.fid];
size_t n_args = instr->CallI.n_args;
/* make sure enough space for our arguments is allocated */
if (n_args > fn_args_cap)
fn_args = xrealloc(fn_args, sizeof(Value) * (fn_args_cap = n_args));
/* copy arguments into buffer */
for (size_t i = 0; i < n_args; i++)
fn_args[i] = *irparam_to_val(&s, &instr->CallI.args[i]);
if (f->returns) {
Value res;
if (f->kind == FuncVarArgs) {
size_t min_args = f->VarArgs.min_args;
TRY_ELSE(res = f->VarArgs.WithRet.func(n_args - min_args, fn_args),
{free(fn_args); stack_term(&s);});
} else if (f->kind == FuncFixedArgs) {
TRY_ELSE(res = f->FixedArgs.WithRet.func(fn_args),
{free(fn_args); stack_term(&s);});
} else
ASSERT_UNREACHED();
stack_assign(&s, instr->CallI.ret_addr, &res);
} else {
if (f->kind == FuncVarArgs) {
size_t min_args = f->VarArgs.min_args;
TRY_ELSE(f->VarArgs.NoRet.func(n_args - min_args, fn_args),
{free(fn_args); stack_term(&s);});
} else if (f->kind == FuncFixedArgs) {
TRY_ELSE(f->FixedArgs.NoRet.func(fn_args),
{free(fn_args); stack_term(&s);});
} else
ASSERT_UNREACHED();
}
break;
}
case IRArrMake: {
size_t arr_len = instr->ArrMake.len, arr_cap = instr->ArrMake.cap;
Value arr = {
.type = TypeArr,
.Arr = {
.type = TypeVoid,
.is_string = false,
.dynamically_allocated = true,
.vals = NULL,
.len = arr_len,
.cap = arr_len ? arr_cap : 0,
},
};
if (arr_len) {
Type arr_ty = irparam_to_val(&s, &instr->ArrMake.vals[0])->type;
void *arr_vals = xmalloc(type_size[arr_ty] * arr_cap);
for (size_t j = 0; j < arr_len; j++) {
Value *v = irparam_to_val(&s, &instr->ArrMake.vals[j]);
if (v->type != arr_ty) {
set_err("Type of array item %zu (%s) differs from array type (%s)", j, type_str[v->type], type_str[arr_ty]);
free(arr_vals);
free(fn_args);
stack_term(&s);
return;
}
memcpy((uint8_t*)arr_vals + type_size[arr_ty] * j, &v->Void, type_size[arr_ty]);
}
arr.Arr.type = arr_ty;
arr.Arr.vals = arr_vals;
}
stack_assign(&s, instr->ArrMake.arr_addr, &arr);
break;
}
default:
ASSERT_UNREACHED();
}
i = i->next;
}
stack_term(&s);
free(fn_args);
}

4
vm.h
View File

@@ -1,4 +1,8 @@
#ifndef __VM_H__
#define __VM_H__
#include "ir.h"
void run(IRList *ir, const BuiltinFunc *builtin_funcs);
#endif /* VM_H */