1 // Copyright © 2023 Simon Forman
3 // This file is part of Thun
5 // Thun is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
10 // Thun is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Thun. If not see <http://www.gnu.org/licenses/>.
18 #include <uvm/syscalls.h>
21 ███████╗██████╗ ██████╗ ██████╗ ██████╗
22 ██╔════╝██╔══██╗██╔══██╗██╔═══██╗██╔══██╗
23 █████╗ ██████╔╝██████╔╝██║ ██║██████╔╝
24 ██╔══╝ ██╔══██╗██╔══██╗██║ ██║██╔══██╗
25 ███████╗██║ ██║██║ ██║╚██████╔╝██║ ██║
26 ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝
28 ██╗ ██╗ █████╗ ███╗ ██╗██████╗ ██╗ ██╗███╗ ██╗ ██████╗
29 ██║ ██║██╔══██╗████╗ ██║██╔══██╗██║ ██║████╗ ██║██╔════╝
30 ███████║███████║██╔██╗ ██║██║ ██║██║ ██║██╔██╗ ██║██║ ███╗
31 ██╔══██║██╔══██║██║╚██╗██║██║ ██║██║ ██║██║╚██╗██║██║ ██║
32 ██║ ██║██║ ██║██║ ╚████║██████╔╝███████╗██║██║ ╚████║╚██████╔╝
33 ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚═════╝ ╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
35 No setjmp/longjmp, so let's have a global error value and check it after ops.
41 #define UNKNOWN_WORD_ERROR 1
42 #define MISSING_CLOSING_BRACKET 2
43 #define EXTRA_CLOSING_BRACKET 3
44 #define CONS_HEAP_OOM 4
45 #define STRING_HEAP_OOM 5
46 #define NOT_ENOUGH_VALUES_ON_STACK 6
48 #define CANNOT_TAKE_FIRST_OF_EMPTY_LIST 8
49 #define CANNOT_TAKE_REST_OF_EMPTY_LIST 9
52 #define CHECK_ERROR if (error != NO_ERROR) return 0;
56 char *error_messages[3] = {
59 "Missing closing bracket"
63 #define PRINT_I64(message, number) { print_str(message); print_i64(number); print_endl(); }
67 ██████╗ ██████╗ ███╗ ██╗███████╗ ██╗ ██╗███████╗ █████╗ ██████╗
68 ██╔════╝██╔═══██╗████╗ ██║██╔════╝ ██║ ██║██╔════╝██╔══██╗██╔══██╗
69 ██║ ██║ ██║██╔██╗ ██║███████╗ ███████║█████╗ ███████║██████╔╝
70 ██║ ██║ ██║██║╚██╗██║╚════██║ ██╔══██║██╔══╝ ██╔══██║██╔═══╝
71 ╚██████╗╚██████╔╝██║ ╚████║███████║ ██║ ██║███████╗██║ ██║██║
72 ╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝
75 We don't have Unions, Enums, or Typedefs. So how do we represent Joy types?
76 In SICP they use a pair of arrays of pointers, one for heads and one
79 > A pointer to a pair is an index into the two vectors.
83 #define HEAP_SIZE 1024
89 // cell 0 is reserved so that 0 can be the empty list.
92 // > We also need a representation for objects other than pairs (such as
93 // > numbers and symbols) and a way to distinguish one kind of data from
94 // > another. There are many methods of accomplishing this, but they all
95 // > reduce to using typed pointers, that is, to extending the notion of
96 // > ``pointer'' to include information on data type.
98 // Let's use u32 with the two MSB's for the type tag.
100 #define TYPE_OF(pointer) (pointer >> 30)
101 #define VALUE_OF(pointer) (pointer & 0x3fffffff)
102 #define JOY_VALUE(type, value) ((type & 3) << 30) | (value & 0x3fffffff)
105 This means that our ints are restricted to 30 bits for now, until
109 In the Thun dialect of Joy we have four types of values:
111 Integers, Booleans, Symbols, and Lists.
118 // Because the type tag for lists is 0 the empty list is just 0;
122 cons(u32 head, u32 tail)
124 if (free >= HEAP_SIZE) {
125 error = CONS_HEAP_OOM;
130 u32 cell = JOY_VALUE(joyList, free);
135 u32 head(u32 list) { return heads[VALUE_OF(list)]; }
136 u32 tail(u32 list) { return tails[VALUE_OF(list)]; }
141 ███████╗████████╗██████╗ ██╗███╗ ██╗ ██████╗
142 ██╔════╝╚══██╔══╝██╔══██╗██║████╗ ██║██╔════╝
143 ███████╗ ██║ ██████╔╝██║██╔██╗ ██║██║ ███╗
144 ╚════██║ ██║ ██╔══██╗██║██║╚██╗██║██║ ██║
145 ███████║ ██║ ██║ ██║██║██║ ╚████║╚██████╔╝
146 ╚══════╝ ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═════╝
148 ██╗ ██╗███████╗ █████╗ ██████╗
149 ██║ ██║██╔════╝██╔══██╗██╔══██╗
150 ███████║█████╗ ███████║██████╔╝
151 ██╔══██║██╔══╝ ██╔══██║██╔═══╝
152 ██║ ██║███████╗██║ ██║██║
153 ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝
154 Simple string storage heap.
156 We need a place to keep symbol strings.
160 #define STRING_HEAP_SIZE 100000
162 char string_heap[STRING_HEAP_SIZE];
163 u32 string_heap_top = 0;
166 allocate_string(char *buffer, u32 offset, u32 length)
168 u64 end = string_heap_top + length + 1;
169 // I have already spent more time thinking about avoiding overflow
170 // from the line above, going into the comparison below and goofing
171 // up for "pathological" inputs (string_heap_top + length == MAX_INT_64
172 // or whatever it's called. In practice, this should never happen,
173 // and we spell that with assert, eh? I'm not going to add it now,
174 // but it would be something like assert(string_heap_top + length < MAX_INT_64)
175 if (end >= STRING_HEAP_SIZE) {
176 error = STRING_HEAP_OOM;
179 memcpy(string_heap + string_heap_top, buffer + offset, length);
180 string_heap[end] = '\0';
181 u32 new_string = string_heap_top;
182 string_heap_top = (u32)end + 1;
183 //print_str("allocating ");print_str(string_heap + new_string);print_endl();
184 return string_heap + new_string;
189 ██████╗ ██████╗ ██╗███╗ ██╗████████╗███████╗██████╗
190 ██╔══██╗██╔══██╗██║████╗ ██║╚══██╔══╝██╔════╝██╔══██╗
191 ██████╔╝██████╔╝██║██╔██╗ ██║ ██║ █████╗ ██████╔╝
192 ██╔═══╝ ██╔══██╗██║██║╚██╗██║ ██║ ██╔══╝ ██╔══██╗
193 ██║ ██║ ██║██║██║ ╚████║ ██║ ███████╗██║ ██║
194 ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝╚═╝ ╚═╝
199 print_joy_value(u32 jv)
201 u8 type = TYPE_OF(jv);
202 if (type == joyInt) {
203 print_i64(VALUE_OF(jv));
204 } else if (type == joyBool) {
205 print_str(VALUE_OF(jv) ? "true" : "false");
206 } else if (type == joyList) {
210 } else if (type == joySymbol) {
211 char *str = ht_lookup(VALUE_OF(jv));
212 if (error != NO_ERROR)
219 print_joy_list(u32 list)
222 print_joy_value(head(list));
223 if (error != NO_ERROR)
234 ██╗ ██╗ █████╗ ███████╗██╗ ██╗
235 ██║ ██║██╔══██╗██╔════╝██║ ██║
236 ███████║███████║███████╗███████║
237 ██╔══██║██╔══██║╚════██║██╔══██║
238 ██║ ██║██║ ██║███████║██║ ██║
239 ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝
241 ████████╗ █████╗ ██████╗ ██╗ ███████╗
242 ╚══██╔══╝██╔══██╗██╔══██╗██║ ██╔════╝
243 ██║ ███████║██████╔╝██║ █████╗
244 ██║ ██╔══██║██╔══██╗██║ ██╔══╝
245 ██║ ██║ ██║██████╔╝███████╗███████╗
246 ╚═╝ ╚═╝ ╚═╝╚═════╝ ╚══════╝╚══════╝
247 And now for a hash table.
249 This table maps between hashes of symbol strings which are used in the
250 tagged pointers in Joy values and strings which are stored in the string
253 TODO: bool ht_has(char *str, u32 index, u32 length) to see if a fragment
254 of a string buffer is a symbol in the hash table.
258 https://benhoyt.com/writings/hash-table-in-c/#hash-tables
259 https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
263 #define FNV_OFFSET 0xcbf29ce484222325
264 #define FNV_PRIME 0x100000001b3
269 u64 hash = FNV_OFFSET;
270 for (char* p = key; *p; ++p) {
271 hash = hash ^ (u64)(unsigned char)(*p);
272 hash = hash * FNV_PRIME;
278 hash_fragment(char *str, u32 index, u32 length)
280 u64 hash = FNV_OFFSET;
281 for (char* p = (str + index); length; --length, ++p) {
282 hash = hash ^ (u64)(unsigned char)(*p);
283 hash = hash * FNV_PRIME;
288 // Capacity is a power of two (10 for now.)
290 #define CAPACITY 1024
291 #define HASH_MASK 1023
293 // Note that there's no checking for filling the table and expanding.
294 // For now, I'm just going to use a "large enough" table and hope
295 // for the best. (We have thirty bits to work with so the obvious
296 // thing to do is make the exponent fifteen, half for the hash key
297 // and half for the increment.)
299 char* hash_table[CAPACITY];
302 ht_insert(char *symbol)
304 u64 hash = hash_key(symbol);
305 u32 index = hash % CAPACITY;
307 char *candidate = hash_table[index];
309 hash_table[index] = symbol;
310 return JOY_VALUE(joySymbol, VALUE_OF(hash));
313 // https://en.wikipedia.org/wiki/Double_hashing
314 // Rather than use another hash function I'm going to try
315 // using the extra bits of the same hash.
316 u32 increment = ((VALUE_OF(hash) >> EXPONENT) | 1) % CAPACITY;
317 // If I understand correctly, making the increment odd
318 // means it will traverse the whole (even-sized) table.
320 // Compare pointers then hashes (since we already have
321 // one hash I'm guessing that that's cheaper or at least
322 // no more expensive than string comparision.)
323 if (candidate == symbol || hash == hash_key(candidate))
325 index = (index + increment) % CAPACITY;
326 candidate = hash_table[index];
329 hash_table[index] = symbol;
331 return JOY_VALUE(joySymbol, VALUE_OF(hash));
337 // Note that hash must be truncated to N (N=30 as it happens) bits
339 u32 index = hash % CAPACITY;
340 char *candidate = hash_table[index];
341 u32 increment = ((hash >> EXPONENT) | 1) % CAPACITY;
343 if (hash == VALUE_OF(hash_key(candidate)))
345 index = (index + increment) % CAPACITY;
346 candidate = hash_table[index];
348 error = UNKNOWN_WORD_ERROR;
354 ht_has(char *str, u32 index, u32 length)
356 u32 hash = VALUE_OF(hash_fragment(str, index, length));
358 if (UNKNOWN_WORD_ERROR == error) {
369 /******************************************************************************/
377 error = NOT_ENOUGH_VALUES_ON_STACK;
387 u32 list = pop_any(stack);
389 if (TYPE_OF(list) != joyList) {
398 push_symbol(char *symbol, u32 stack)
400 return cons(JOY_VALUE(joySymbol, ht_insert(symbol)), stack);
405 push_int(u32 n, u32 stack)
407 return cons(JOY_VALUE(joyInt, n), stack);
411 /******************************************************************************/
414 is_integer(char *str, u32 index, u32 length)
417 for (;length; --length) {
418 char ch = *(str + length - 1);
419 if (ch < '0' || ch > '9') return 0;
425 convert_integer(char *str, u32 index, u32 length)
428 length = length + index;
429 for (; index < length; ++index) {
430 char ch = *(str + index);
431 result = result * 10 + ((u8)ch - (u8)'0');
433 //print_str("converted integer ");print_i64(result);print_endl();
434 return JOY_VALUE(joyInt, result);
437 /******************************************************************************/
441 ██████╗ █████╗ ██████╗ ███████╗███████╗██████╗
442 ██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝██╔══██╗
443 ██████╔╝███████║██████╔╝███████╗█████╗ ██████╔╝
444 ██╔═══╝ ██╔══██║██╔══██╗╚════██║██╔══╝ ██╔══██╗
445 ██║ ██║ ██║██║ ██║███████║███████╗██║ ██║
446 ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═╝
453 intern(char *str, u32 index, u32 length)
455 u32 symbol_hash = ht_has(str, index, length);
457 char *token = allocate_string(str, index, length);
458 if (error != NO_ERROR) {
459 //print_str("a. Error code: ");print_i64(error);print_endl();
462 symbol_hash = ht_insert(token);
464 return JOY_VALUE(joySymbol, symbol_hash);
469 tokenate(char *str, u32 index, u32 length)
472 && *(str + index) == 't'
473 && *(str + index + 1) == 'r'
474 && *(str + index + 2) == 'u'
475 && *(str + index + 3) == 'e'
477 //print_str("tokenate true");print_endl();
478 return JOY_VALUE(joyBool, 1);
481 && *(str + index) == 'f'
482 && *(str + index + 1) == 'a'
483 && *(str + index + 2) == 'l'
484 && *(str + index + 3) == 's'
485 && *(str + index + 4) == 'e'
487 //print_str("tokenate false");print_endl();
488 return JOY_VALUE(joyBool, 0);
490 if (is_integer(str, index, length)) {
491 //print_str("tokenate integer");print_endl();
492 return convert_integer(str, index, length);
494 return intern(str, index, length);
499 is_delimiter(char ch)
501 return ch == '[' || ch == ']' || ch == ' ';
504 // Store in-progress lists. Here as in the hash table I'm not checking
505 // for capacity overload or anything like that. If you think you're going
506 // to parse more than a five hundred '[' chars then increase the size of
509 u32 t2e_stack_top = 0;
511 #define T2E_PUSH(thing) t2e_stack[t2e_stack_top] = (thing); ++t2e_stack_top; (thing) = empty_list;
512 #define T2E_POP(thing) if (!t2e_stack_top) { error = EXTRA_CLOSING_BRACKET; return 0; }; --t2e_stack_top; (thing) = t2e_stack[t2e_stack_top];
515 text_to_expression(char *str)
518 u32 end = empty_list;
519 u32 top = empty_list;
520 u32 tok = empty_list;
521 u64 str_length = strlen(str); // TODO: rewrite so we don't iterate through the string twice.
522 while (index < str_length) {
523 char ch = str[index];
528 if ('[' == ch) { // start new list
534 if (']' == ch) { // finish last new list
541 for (; i < str_length && !is_delimiter(str[i]); ++i) {}
542 tok = tokenate(str, index, i - index);
545 u32 cell = cons(tok, empty_list);
546 if (end) tails[end] = cell;
547 if (!top) top = cell;
551 error = MISSING_CLOSING_BRACKET;
559 In order to return two "pointers" I'm going to just OR them
560 into one u64 value. It might be conceptually cleaner to define
561 an array of two u32 values, eh?
565 I'll figure it out later, I just want to get something going for now.
566 I also don't want to take the time at the moment to figure out if/how
567 to call function pointers in NCC C, so a chain of if..else is the
571 #define MATCH(name) if (!strcmp(symbol, (name)))
574 joy_eval(char *symbol, u32 stack, u32 expression)
576 MATCH("clear") return (u64)expression;
578 //return i_joy_combinator(stack, expression);
580 u32 list = pop_list(stack); CHECK_ERROR
582 stack = joy(stack, list); CHECK_ERROR
584 else MATCH("concat") { stack = concat(stack); }
585 else MATCH("cons") { stack = cons_joy_func(stack); }
586 else MATCH("dup") { stack = dup(stack); }
587 else MATCH("first") { stack = first(stack); }
588 else MATCH("pop") { stack = pop(stack); }
589 else MATCH("rest") { stack = rest(stack); }
590 else MATCH("stack") { stack = cons(stack, stack); }
591 else MATCH("swaack") { stack = swaack(stack); }
592 else MATCH("swap") { stack = swap(stack); }
593 // i, dip, branch, loop ...
594 //else MATCH("") { stack = (stack); }
596 //print_str(symbol);print_endl();
597 return (u64)stack << 32 | expression;
602 i_joy_combinator(u32 stack, u32 expression)
604 u32 list = pop_list(stack); CHECK_ERROR
606 // And here we now have to implement expression-as-list-of-lists.
607 expression = cons(list, expression); CHECK_ERROR
608 return (u64)stack << 32 | expression;
616 cons_joy_func(u32 stack)
618 u32 list = pop_list(stack); CHECK_ERROR
620 u32 tos = pop_any(stack); CHECK_ERROR
622 list = cons(tos, list); CHECK_ERROR
623 stack = cons(list, stack); CHECK_ERROR
631 u32 list_tail = pop_list(stack); CHECK_ERROR
633 u32 list_head = pop_list(stack); CHECK_ERROR
635 u32 result = empty_list;
636 if (!list_tail) { result = list_head; } else
637 if (!list_head) { result = list_tail; } else
639 result = cons(head(list_head), empty_list); CHECK_ERROR
640 list_head = tail(list_head);
641 u32 current_cell = result;
643 u32 next_cell = cons(head(list_head), empty_list); CHECK_ERROR
644 list_head = tail(list_head);
645 current_cell = tails[current_cell] = next_cell;
647 tails[current_cell] = list_tail;
649 stack = cons(result, stack); CHECK_ERROR
657 u32 list = pop_list(stack); CHECK_ERROR
658 stack = cons(tail(stack), list); CHECK_ERROR
666 u32 list = pop_list(stack); CHECK_ERROR
668 error = CANNOT_TAKE_FIRST_OF_EMPTY_LIST;
671 stack = cons(head(list), tail(stack)); CHECK_ERROR
679 u32 list = pop_list(stack); CHECK_ERROR
681 error = CANNOT_TAKE_REST_OF_EMPTY_LIST;
684 stack = cons(tail(list), tail(stack)); CHECK_ERROR
693 error = NOT_ENOUGH_VALUES_ON_STACK;
703 u32 tos = pop_any(stack); CHECK_ERROR
704 stack = cons(tos, stack); CHECK_ERROR
712 u32 tos = pop_any(stack); CHECK_ERROR
714 u32 second = pop_any(stack); CHECK_ERROR
716 stack = cons(tos, stack); CHECK_ERROR
717 stack = cons(second, stack); CHECK_ERROR
723 joy(u32 stack, u32 expression)
727 term = head(expression);
728 expression = tail(expression);
729 if (TYPE_OF(term) == joySymbol) {
730 char *symbol = ht_lookup(VALUE_OF(term)); CHECK_ERROR
731 u64 new_state = joy_eval(symbol, stack, expression); CHECK_ERROR
732 stack = new_state >> 32;
733 expression = new_state & 0xffffffff;
735 else { stack = cons(term, stack); CHECK_ERROR }
744 memset(hash_table, 0, sizeof(hash_table));
745 memset(string_heap, 0, sizeof(string_heap));
746 memset(t2e_stack, 0, sizeof(t2e_stack));
749 // TODO: these should be global.
750 u32 joy_true = JOY_VALUE(joyBool, 1);
751 u32 joy_false = JOY_VALUE(joyBool, 0);
754 u32 stack = empty_list;
755 stack = push_int(23, stack);
756 stack = cons(joy_true, stack);
757 stack = push_int(42, stack);
758 stack = push_symbol("cats", stack);
760 el = push_int(48, el);
762 stack = cons(el, stack);
763 stack = cons(joy_false, stack);
764 stack = push_int(273, stack);
765 print_joy_list(stack);
769 u32 expression = text_to_expression("1 2 3 stack i 23 18");
770 //u32 expression = text_to_expression("1 2 3 stack rest first [] cons cons [99 888 7] concat");
771 //u32 expression = text_to_expression("1 2 3 clear 4 5 6");
772 //u32 expression = text_to_expression(" 1[2[true 3][aa[aa bb] aa bb cc]bob]false[]bob 3[4] ga[]ry");
773 print_joy_list(expression);
775 u32 stack = joy(empty_list, expression);
776 if (error) PRINT_I64("error: ", error) else {
777 print_joy_list(stack);