aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLizzy Fleckenstein <eliasfleckenstein@web.de>2023-08-21 19:24:26 +0200
committerLizzy Fleckenstein <eliasfleckenstein@web.de>2023-08-21 19:24:26 +0200
commit8648ef9dd658b4e0a71d0e188260eff071fae881 (patch)
tree349392241f562990a761a29e874bb6e381523d03
parentecd10c643962ac1122fa016e30aee8b7796f8c45 (diff)
downloadparadox-8648ef9dd658b4e0a71d0e188260eff071fae881.tar.xz
I/O buffering
-rw-r--r--README.md26
-rwxr-xr-xbootstrap.lua113
-rw-r--r--examples/brainfuck.false4
-rw-r--r--paradox.false128
4 files changed, 203 insertions, 68 deletions
diff --git a/README.md b/README.md
index df398e3..2282091 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,11 @@ Due to lack of `stderr` access in FALSE, syntax errors are emitted as `%fatal` N
### I/O Buffering
-Paradox currently does not buffer I/O (using syscalls directly) but will do so in the future. B/ß are no-ops.
+Paradox implements buffered I/O. It uses a fixed buffer size of 8192. To change this, you can use `sed -i 's/8192/YOUR_BUFSIZE_HERE/g' paradox.false`. To find out an appropriate buffer size for your system, you can use the following command, if you have a C compiler installed:
+
+```
+echo '#include <stdio.h>\nBUFSIZ' | cpp | tail -n1
+```
### Inline assembly
@@ -93,20 +97,20 @@ Variables and lambdas are pointers.
#### String pointers
-`["my_stringy"]$12+;$@21+;+\[$@$@>][1-$;,\]#%%10,` will print my_stringy in reverse (ygnirts_ym). This works with any string. This is due to the binary layout of lambdas containing a single string (consisting of just a syscall to print out the string):
+`["my_stringy"]$2+;$@11+;+\[$@$@>][1-$;,\]#%%10,` will print my_stringy in reverse (ygnirts_ym). This works with any string. This is due to the binary layout of lambdas containing a single string (consisting of just a call to write with the necessary parameters):
```
0000000000401002 <fun_1>:
- 401002: b8 01 00 00 00 mov eax,0x1
- 401007: bf 01 00 00 00 mov edi,0x1
- 40100c: 48 be 1e 10 40 00 00 movabs rsi,0x40101e
- 401013: 00 00 00
- 401016: ba 0a 00 00 00 mov edx,0xa
- 40101b: 0f 05 syscall
- 40101d: c3 ret
+ 401002: 48 be 00 20 40 00 00 movabs rsi,0x402000
+ 401009: 00 00 00
+ 40100c: b9 09 00 00 00 mov ecx,0x9
+ 401011: e8 86 00 00 00 call 40109c <write>
+ 401016: c3 ret
```
-A pointer to the string is stored at offset 12, and the length is stored at offset 21.
+(generated by `objdump -D -M intel some_binary_here`)
+
+A pointer to the string (0x402000) is stored at offset 2, and the length of the string (0x9) is stored at offset 11.
Strings are stored in the data section, so it is possible to write to them.
@@ -116,7 +120,7 @@ It is possible to make memory allocations using strings by compiling your progra
(echo "[\"$(head -c YOUR_ALLOCATION_SIZE /dev/zero)\"]" && cat your_source_file.false) | ./paradox
```
-In the program, you can then use `12+;` at the beginning of the file to extract a pointer to your allocation.
+In the program, you can then use `2+;` at the beginning of the file to extract a pointer to your allocation.
Since all operations fetch 64-bits, it is recommended to set the allocation size to 7 bytes higher than desired (if you wish to fetch/write the last few bytes of the allocation individually).
diff --git a/bootstrap.lua b/bootstrap.lua
index f7308fa..aec4b0e 100755
--- a/bootstrap.lua
+++ b/bootstrap.lua
@@ -110,31 +110,22 @@ mov rax, [rax]
mov [r12], rax
]],
[","] = [[
-mov rax, 1
-mov rdi, 1
mov rsi, r12
-mov rdx, 1
-syscall
+mov rcx, 1
+call write
add r12, 8
]],
["^"] = [[
+call read
sub r12, 8
-mov qword[r12], 0
-mov rax, 0
-mov rdi, 0
-mov rsi, r12
-mov rdx, 1
-syscall
-mov ebx, [r12]
-mov ecx, -1
-cmp rax, 0
-cmove ebx, ecx
-mov [r12], ebx
+mov [r12], eax
]],
["."] = [[
call print_num
]],
- ["B"] = "",
+ ["B"] = [[
+call flush
+]],
}
local fn_counter = 0
@@ -210,11 +201,9 @@ local function compile_fn()
end
table.insert(str, x:byte(1))
end
- print("mov rax, 1")
- print("mov rdi, 1")
print("mov rsi, str_" .. str_counter)
- print("mov rdx, " .. #str)
- print("syscall")
+ print("mov rcx, " .. #str)
+ print("call write")
strings = "str_" .. str_counter .. ": db " .. table.concat(str, ",") .. "\n" .. strings
str_counter = str_counter + 1
c = nil
@@ -283,6 +272,81 @@ print("section .text")
compile_fn()
io.write([[
+section .data
+readbuf_len: dq 0
+readbuf_cursor: dq 0
+writebuf_len: dq 0
+section .bss
+readbuf: resb 8192
+writebuf: resb 8192
+section .text
+read:
+mov rax, [readbuf_cursor]
+mov rbx, [readbuf_len]
+cmp rax, rbx
+jb .has
+cmp rbx, 8192
+jb .fill
+xor rbx, rbx
+mov [readbuf_len], rbx
+mov [readbuf_cursor], rbx
+.fill:
+mov rax, 0
+mov rdi, 0
+lea rsi, [rbx+readbuf]
+mov rdx, 8192
+sub rdx, rbx
+syscall
+add [readbuf_len], rax
+cmp rax, 0
+jne .has
+mov eax, -1
+ret
+.has:
+mov rax, [readbuf_cursor]
+movzx eax, byte[readbuf+rax]
+inc qword[readbuf_cursor]
+ret
+write:
+mov rdi, [writebuf_len]
+mov rax, 8192
+sub rax, rdi
+add rdi, writebuf
+mov rdx, rcx
+sub rdx, rax
+jna .simple
+mov rcx, rax
+rep movsb
+push rsi
+push rdx
+mov qword[writebuf_len], 8192
+call flush
+pop rdx
+pop rsi
+cmp rdx, 8192
+ja .direct
+mov rcx, rdx
+mov rdi, writebuf
+.simple:
+add [writebuf_len], rcx
+rep movsb
+ret
+.direct:
+mov rax, 1
+mov rdi, 1
+syscall
+ret
+flush:
+mov rdx, [writebuf_len]
+cmp rdx, 0
+je .return
+mov rax, 1
+mov rdi, 1
+mov rsi, writebuf
+syscall
+mov qword[writebuf_len], 0
+.return:
+ret
conditional:
add r12, 16
mov eax, [r12-8]
@@ -332,12 +396,10 @@ jle .print
dec rcx
mov byte[rcx], '-'
.print:
-mov rax, 1
-mov rdi, 1
mov rsi, rcx
-lea rdx, [rsp+16]
-sub rdx, rcx
-syscall
+lea rcx, [rsp+16]
+sub rcx, rsi
+call write
add rsp, 16
ret
]])
@@ -347,6 +409,7 @@ global _start
_start:
lea r12, [stack+8*1000000]
call fun_0
+call flush
mov rax, 60
mov rdi, 0
syscall
diff --git a/examples/brainfuck.false b/examples/brainfuck.false
index 0ba9adf..ff603ae 100644
--- a/examples/brainfuck.false
+++ b/examples/brainfuck.false
@@ -14,7 +14,7 @@
example hello world brainfuck program to try:
echo '+[>>>->-[>->----<<<]>>]>.---.>+..+++.>>.<.>>---.<<<.+++.------.<-.>>+.' | ./brainfuck
}
-12+;$$t:h:l:
+2+;$$t:h:l:
0d: { change this to 0d: to disable debugging, 1_d: to enable debugging }
0[^$$1_=~\0=~&][\1+]#% $n:
[
@@ -51,7 +51,7 @@
$'<=[t;$l;=[%l;30000+]?1-t:]?
$'+=[t;$;$255~&\1+255&|\:]?
$'-=[t;$;$255~&\1-255&|\:]?
- $'.=[t;;,]?
+ $'.=[t;;,ß]?
$',=[t;$;255~&^|\:]?
$'[=[t;;255&0=[1_b;!]?]?
$']=[t;;255&0=~[1b;!]?]?
diff --git a/paradox.false b/paradox.false
index a2206e1..52cf9fe 100644
--- a/paradox.false
+++ b/paradox.false
@@ -138,11 +138,9 @@
$ 1+ s: { increment str_counter }
{ emit print string }
- "mov rax, 1" 10,
- "mov rdi, 1" 10,
"mov rsi, str_" 0 ø . 10,
- "mov rdx, " 1 ø . 10,
- "syscall" 10,
+ "mov rcx, " 1 ø . 10,
+ "call write" 10,
1_ c: { consume current_char }
]?
@@ -390,29 +388,18 @@ mov [r12], rax
{ elseif c = , }
c;',= $[%~1_\]?[
-"mov rax, 1
-mov rdi, 1
-mov rsi, r12
-mov rdx, 1
-syscall
+"mov rsi, r12
+mov rcx, 1
+call write
add r12, 8
"
1_ c:]?
{ elseif c = ^ }
c;'^= $[%~1_\]?[
-"sub r12, 8
-mov qword[r12], 0
-mov rax, 0
-mov rdi, 0
-mov rsi, r12
-mov rdx, 1
-syscall
-mov ebx, [r12]
-mov ecx, -1
-cmp rax, 0
-cmove ebx, ecx
-mov [r12], ebx
+"call read
+sub r12, 8
+mov [r12], eax
"
1_ c:]?
@@ -423,7 +410,10 @@ mov [r12], ebx
1_ c:]?
{ elseif c = B }
- c;'B= $[%~1_\]?[1_ c:]?
+ c;'B= $[%~1_\]?[
+"call flush
+"
+ 1_ c:]?
{ else error }
~["unknown character: "c;,]e;!
@@ -480,6 +470,85 @@ x;! { call compile_fn }
{ if state != ERROR }
q;2=~[
+{ buffered I/O }
+8192
+"section .data
+readbuf_len: dq 0
+readbuf_cursor: dq 0
+writebuf_len: dq 0
+section .bss
+readbuf: resb "$."
+writebuf: resb "$."
+section .text
+read:
+mov rax, [readbuf_cursor]
+mov rbx, [readbuf_len]
+cmp rax, rbx
+jb .has
+cmp rbx, "$."
+jb .fill
+xor rbx, rbx
+mov [readbuf_len], rbx
+mov [readbuf_cursor], rbx
+.fill:
+mov rax, 0
+mov rdi, 0
+lea rsi, [rbx+readbuf]
+mov rdx, "$."
+sub rdx, rbx
+syscall
+add [readbuf_len], rax
+cmp rax, 0
+jne .has
+mov eax, -1
+ret
+.has:
+mov rax, [readbuf_cursor]
+movzx eax, byte[readbuf+rax]
+inc qword[readbuf_cursor]
+ret
+write:
+mov rdi, [writebuf_len]
+mov rax, "$."
+sub rax, rdi
+add rdi, writebuf
+mov rdx, rcx
+sub rdx, rax
+jna .simple
+mov rcx, rax
+rep movsb
+push rsi
+push rdx
+mov qword[writebuf_len], "$."
+call flush
+pop rdx
+pop rsi
+cmp rdx, "$."
+ja .direct
+mov rcx, rdx
+mov rdi, writebuf
+.simple:
+add [writebuf_len], rcx
+rep movsb
+ret
+.direct:
+mov rax, 1
+mov rdi, 1
+syscall
+ret
+flush:
+mov rdx, [writebuf_len]
+cmp rdx, 0
+je .return
+mov rax, 1
+mov rdi, 1
+mov rsi, writebuf
+syscall
+mov qword[writebuf_len], 0
+.return:
+ret
+"%
+
{ builtin functions }
"conditional:
add r12, 16
@@ -530,29 +599,28 @@ jle .print
dec rcx
mov byte[rcx], '-'
.print:
-mov rax, 1
-mov rdi, 1
mov rsi, rcx
-lea rdx, [rsp+16]
-sub rdx, rcx
-syscall
+lea rcx, [rsp+16]
+sub rcx, rsi
+call write
add rsp, 16
ret
"
-{ emit setup and stack }
+{ emit _start and stack }
1000000
"global _start
_start:
lea r12, [stack+8*"$."]
call fun_0
+call flush
mov rax, 60
mov rdi, 0
syscall
section .bss
-stack: resq "."
-"
+stack: resq "$."
+"%
{ emit variables }