diff options
author | Lizzy Fleckenstein <eliasfleckenstein@web.de> | 2023-08-21 19:24:26 +0200 |
---|---|---|
committer | Lizzy Fleckenstein <eliasfleckenstein@web.de> | 2023-08-21 19:24:26 +0200 |
commit | 8648ef9dd658b4e0a71d0e188260eff071fae881 (patch) | |
tree | 349392241f562990a761a29e874bb6e381523d03 | |
parent | ecd10c643962ac1122fa016e30aee8b7796f8c45 (diff) | |
download | paradox-8648ef9dd658b4e0a71d0e188260eff071fae881.tar.xz |
I/O buffering
-rw-r--r-- | README.md | 26 | ||||
-rwxr-xr-x | bootstrap.lua | 113 | ||||
-rw-r--r-- | examples/brainfuck.false | 4 | ||||
-rw-r--r-- | paradox.false | 128 |
4 files changed, 203 insertions, 68 deletions
@@ -59,7 +59,11 @@ Due to lack of `stderr` access in FALSE, syntax errors are emitted as `%fatal` N ### I/O Buffering -Paradox currently does not buffer I/O (using syscalls directly) but will do so in the future. B/ß are no-ops. +Paradox implements buffered I/O. It uses a fixed buffer size of 8192. To change this, you can use `sed -i 's/8192/YOUR_BUFSIZE_HERE/g' paradox.false`. To find out an appropriate buffer size for your system, you can use the following command, if you have a C compiler installed: + +``` +echo '#include <stdio.h>\nBUFSIZ' | cpp | tail -n1 +``` ### Inline assembly @@ -93,20 +97,20 @@ Variables and lambdas are pointers. #### String pointers -`["my_stringy"]$12+;$@21+;+\[$@$@>][1-$;,\]#%%10,` will print my_stringy in reverse (ygnirts_ym). This works with any string. This is due to the binary layout of lambdas containing a single string (consisting of just a syscall to print out the string): +`["my_stringy"]$2+;$@11+;+\[$@$@>][1-$;,\]#%%10,` will print my_stringy in reverse (ygnirts_ym). This works with any string. This is due to the binary layout of lambdas containing a single string (consisting of just a call to write with the necessary parameters): ``` 0000000000401002 <fun_1>: - 401002: b8 01 00 00 00 mov eax,0x1 - 401007: bf 01 00 00 00 mov edi,0x1 - 40100c: 48 be 1e 10 40 00 00 movabs rsi,0x40101e - 401013: 00 00 00 - 401016: ba 0a 00 00 00 mov edx,0xa - 40101b: 0f 05 syscall - 40101d: c3 ret + 401002: 48 be 00 20 40 00 00 movabs rsi,0x402000 + 401009: 00 00 00 + 40100c: b9 09 00 00 00 mov ecx,0x9 + 401011: e8 86 00 00 00 call 40109c <write> + 401016: c3 ret ``` -A pointer to the string is stored at offset 12, and the length is stored at offset 21. +(generated by `objdump -D -M intel some_binary_here`) + +A pointer to the string (0x402000) is stored at offset 2, and the length of the string (0x9) is stored at offset 11. Strings are stored in the data section, so it is possible to write to them. @@ -116,7 +120,7 @@ It is possible to make memory allocations using strings by compiling your progra (echo "[\"$(head -c YOUR_ALLOCATION_SIZE /dev/zero)\"]" && cat your_source_file.false) | ./paradox ``` -In the program, you can then use `12+;` at the beginning of the file to extract a pointer to your allocation. +In the program, you can then use `2+;` at the beginning of the file to extract a pointer to your allocation. Since all operations fetch 64-bits, it is recommended to set the allocation size to 7 bytes higher than desired (if you wish to fetch/write the last few bytes of the allocation individually). diff --git a/bootstrap.lua b/bootstrap.lua index f7308fa..aec4b0e 100755 --- a/bootstrap.lua +++ b/bootstrap.lua @@ -110,31 +110,22 @@ mov rax, [rax] mov [r12], rax ]], [","] = [[ -mov rax, 1 -mov rdi, 1 mov rsi, r12 -mov rdx, 1 -syscall +mov rcx, 1 +call write add r12, 8 ]], ["^"] = [[ +call read sub r12, 8 -mov qword[r12], 0 -mov rax, 0 -mov rdi, 0 -mov rsi, r12 -mov rdx, 1 -syscall -mov ebx, [r12] -mov ecx, -1 -cmp rax, 0 -cmove ebx, ecx -mov [r12], ebx +mov [r12], eax ]], ["."] = [[ call print_num ]], - ["B"] = "", + ["B"] = [[ +call flush +]], } local fn_counter = 0 @@ -210,11 +201,9 @@ local function compile_fn() end table.insert(str, x:byte(1)) end - print("mov rax, 1") - print("mov rdi, 1") print("mov rsi, str_" .. str_counter) - print("mov rdx, " .. #str) - print("syscall") + print("mov rcx, " .. #str) + print("call write") strings = "str_" .. str_counter .. ": db " .. table.concat(str, ",") .. "\n" .. strings str_counter = str_counter + 1 c = nil @@ -283,6 +272,81 @@ print("section .text") compile_fn() io.write([[ +section .data +readbuf_len: dq 0 +readbuf_cursor: dq 0 +writebuf_len: dq 0 +section .bss +readbuf: resb 8192 +writebuf: resb 8192 +section .text +read: +mov rax, [readbuf_cursor] +mov rbx, [readbuf_len] +cmp rax, rbx +jb .has +cmp rbx, 8192 +jb .fill +xor rbx, rbx +mov [readbuf_len], rbx +mov [readbuf_cursor], rbx +.fill: +mov rax, 0 +mov rdi, 0 +lea rsi, [rbx+readbuf] +mov rdx, 8192 +sub rdx, rbx +syscall +add [readbuf_len], rax +cmp rax, 0 +jne .has +mov eax, -1 +ret +.has: +mov rax, [readbuf_cursor] +movzx eax, byte[readbuf+rax] +inc qword[readbuf_cursor] +ret +write: +mov rdi, [writebuf_len] +mov rax, 8192 +sub rax, rdi +add rdi, writebuf +mov rdx, rcx +sub rdx, rax +jna .simple +mov rcx, rax +rep movsb +push rsi +push rdx +mov qword[writebuf_len], 8192 +call flush +pop rdx +pop rsi +cmp rdx, 8192 +ja .direct +mov rcx, rdx +mov rdi, writebuf +.simple: +add [writebuf_len], rcx +rep movsb +ret +.direct: +mov rax, 1 +mov rdi, 1 +syscall +ret +flush: +mov rdx, [writebuf_len] +cmp rdx, 0 +je .return +mov rax, 1 +mov rdi, 1 +mov rsi, writebuf +syscall +mov qword[writebuf_len], 0 +.return: +ret conditional: add r12, 16 mov eax, [r12-8] @@ -332,12 +396,10 @@ jle .print dec rcx mov byte[rcx], '-' .print: -mov rax, 1 -mov rdi, 1 mov rsi, rcx -lea rdx, [rsp+16] -sub rdx, rcx -syscall +lea rcx, [rsp+16] +sub rcx, rsi +call write add rsp, 16 ret ]]) @@ -347,6 +409,7 @@ global _start _start: lea r12, [stack+8*1000000] call fun_0 +call flush mov rax, 60 mov rdi, 0 syscall diff --git a/examples/brainfuck.false b/examples/brainfuck.false index 0ba9adf..ff603ae 100644 --- a/examples/brainfuck.false +++ b/examples/brainfuck.false @@ -14,7 +14,7 @@ example hello world brainfuck program to try: echo '+[>>>->-[>->----<<<]>>]>.---.>+..+++.>>.<.>>---.<<<.+++.------.<-.>>+.' | ./brainfuck } -12+;$$t:h:l: +2+;$$t:h:l: 0d: { change this to 0d: to disable debugging, 1_d: to enable debugging } 0[^$$1_=~\0=~&][\1+]#% $n: [ @@ -51,7 +51,7 @@ $'<=[t;$l;=[%l;30000+]?1-t:]? $'+=[t;$;$255~&\1+255&|\:]? $'-=[t;$;$255~&\1-255&|\:]? - $'.=[t;;,]? + $'.=[t;;,ß]? $',=[t;$;255~&^|\:]? $'[=[t;;255&0=[1_b;!]?]? $']=[t;;255&0=~[1b;!]?]? diff --git a/paradox.false b/paradox.false index a2206e1..52cf9fe 100644 --- a/paradox.false +++ b/paradox.false @@ -138,11 +138,9 @@ $ 1+ s: { increment str_counter } { emit print string } - "mov rax, 1" 10, - "mov rdi, 1" 10, "mov rsi, str_" 0 ø . 10, - "mov rdx, " 1 ø . 10, - "syscall" 10, + "mov rcx, " 1 ø . 10, + "call write" 10, 1_ c: { consume current_char } ]? @@ -390,29 +388,18 @@ mov [r12], rax { elseif c = , } c;',= $[%~1_\]?[ -"mov rax, 1 -mov rdi, 1 -mov rsi, r12 -mov rdx, 1 -syscall +"mov rsi, r12 +mov rcx, 1 +call write add r12, 8 " 1_ c:]? { elseif c = ^ } c;'^= $[%~1_\]?[ -"sub r12, 8 -mov qword[r12], 0 -mov rax, 0 -mov rdi, 0 -mov rsi, r12 -mov rdx, 1 -syscall -mov ebx, [r12] -mov ecx, -1 -cmp rax, 0 -cmove ebx, ecx -mov [r12], ebx +"call read +sub r12, 8 +mov [r12], eax " 1_ c:]? @@ -423,7 +410,10 @@ mov [r12], ebx 1_ c:]? { elseif c = B } - c;'B= $[%~1_\]?[1_ c:]? + c;'B= $[%~1_\]?[ +"call flush +" + 1_ c:]? { else error } ~["unknown character: "c;,]e;! @@ -480,6 +470,85 @@ x;! { call compile_fn } { if state != ERROR } q;2=~[ +{ buffered I/O } +8192 +"section .data +readbuf_len: dq 0 +readbuf_cursor: dq 0 +writebuf_len: dq 0 +section .bss +readbuf: resb "$." +writebuf: resb "$." +section .text +read: +mov rax, [readbuf_cursor] +mov rbx, [readbuf_len] +cmp rax, rbx +jb .has +cmp rbx, "$." +jb .fill +xor rbx, rbx +mov [readbuf_len], rbx +mov [readbuf_cursor], rbx +.fill: +mov rax, 0 +mov rdi, 0 +lea rsi, [rbx+readbuf] +mov rdx, "$." +sub rdx, rbx +syscall +add [readbuf_len], rax +cmp rax, 0 +jne .has +mov eax, -1 +ret +.has: +mov rax, [readbuf_cursor] +movzx eax, byte[readbuf+rax] +inc qword[readbuf_cursor] +ret +write: +mov rdi, [writebuf_len] +mov rax, "$." +sub rax, rdi +add rdi, writebuf +mov rdx, rcx +sub rdx, rax +jna .simple +mov rcx, rax +rep movsb +push rsi +push rdx +mov qword[writebuf_len], "$." +call flush +pop rdx +pop rsi +cmp rdx, "$." +ja .direct +mov rcx, rdx +mov rdi, writebuf +.simple: +add [writebuf_len], rcx +rep movsb +ret +.direct: +mov rax, 1 +mov rdi, 1 +syscall +ret +flush: +mov rdx, [writebuf_len] +cmp rdx, 0 +je .return +mov rax, 1 +mov rdi, 1 +mov rsi, writebuf +syscall +mov qword[writebuf_len], 0 +.return: +ret +"% + { builtin functions } "conditional: add r12, 16 @@ -530,29 +599,28 @@ jle .print dec rcx mov byte[rcx], '-' .print: -mov rax, 1 -mov rdi, 1 mov rsi, rcx -lea rdx, [rsp+16] -sub rdx, rcx -syscall +lea rcx, [rsp+16] +sub rcx, rsi +call write add rsp, 16 ret " -{ emit setup and stack } +{ emit _start and stack } 1000000 "global _start _start: lea r12, [stack+8*"$."] call fun_0 +call flush mov rax, 60 mov rdi, 0 syscall section .bss -stack: resq "." -" +stack: resq "$." +"% { emit variables } |