1#!/usr/bin/perl
2#	$OpenBSD: gen_syscall_emulator.pl,v 1.1 2023/09/03 01:43:09 afresh1 Exp $	#
3use v5.36;
4use autodie;
5
6# Copyright (c) 2023 Andrew Hewus Fresh <afresh1@openbsd.org>
7#
8# Permission to use, copy, modify, and distribute this software for any
9# purpose with or without fee is hereby granted, provided that the above
10# copyright notice and this permission notice appear in all copies.
11#
12# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19
20my $includes = '/usr/include';
21
22# Because perl uses a long for every syscall argument,
23# if we are building a syscall_emulator for use by perl,
24# taking that into account make things work more consistently
25# across different OpenBSD architectures.
26# Unfortunately there doesn't appear to be an easy way
27# to make everything work "the way it was".
28use constant PERL_LONG_ARGS => 1;
29
30# See also /usr/src/sys/kern/syscalls.master
31my %syscalls = parse_syscalls(
32    "$includes/sys/syscall.h",
33    "$includes/sys/syscallargs.h",
34)->%*;
35delete $syscalls{MAXSYSCALL}; # not an actual function
36
37# The ordered list of all the headers we need
38my @headers = qw<
39	sys/syscall.h
40	stdarg.h
41	errno.h
42
43	sys/socket.h
44	sys/event.h
45	sys/futex.h
46	sys/ioctl.h
47	sys/ktrace.h
48	sys/mman.h
49	sys/mount.h
50	sys/msg.h
51	sys/poll.h
52	sys/ptrace.h
53	sys/resource.h
54	sys/select.h
55	sys/sem.h
56	sys/shm.h
57	sys/stat.h
58	sys/sysctl.h
59	sys/time.h
60	sys/uio.h
61	sys/wait.h
62
63	dirent.h
64	fcntl.h
65	sched.h
66	signal.h
67	stdlib.h
68	stdio.h
69	syslog.h
70	tib.h
71	time.h
72	unistd.h
73>;
74
75foreach my $header (@headers) {
76	my $filename = "$includes/$header";
77	open my $fh, '<', $filename;
78	my $content = do { local $/; readline $fh };
79	close $fh;
80
81	foreach my $name (sort keys %syscalls) {
82		my $s = $syscalls{$name};
83		my $func_sig = find_func_sig($content, $name, $s);
84
85		if (ref $func_sig) {
86			die "Multiple defs for $name <$header> <$s->{header}>"
87			    if $s->{header};
88			$s->{func} = $func_sig;
89			$s->{header} = $header;
90		} elsif ($func_sig) {
91			$s->{mismatched_sig} = "$func_sig <$header>";
92		}
93	}
94}
95
96say "/*\n * Generated from gen_syscall_emulator.pl\n */";
97say "#include <$_>" for @headers;
98print <<"EOL";
99#include "syscall_emulator.h"
100
101long
102syscall_emulator(int syscall, ...)
103{
104	long ret = 0;
105	va_list args;
106	va_start(args, syscall);
107
108	switch(syscall) {
109EOL
110
111foreach my $name (
112	sort { $syscalls{$a}{id} <=> $syscalls{$b}{id} } keys %syscalls
113    ) {
114	my %s = %{ $syscalls{$name} };
115
116	# Some syscalls we can't emulate, so we comment those out.
117	$s{skip} //= "Indirect syscalls not supported"
118	    if !$s{argtypes} && ($s{args}[-1] || '') eq '...';
119	$s{skip} //= "Mismatched func: $s{mismatched_sig}"
120	    if $s{mismatched_sig} and not $s{func};
121	$s{skip} //= "No signature found in headers"
122	    unless $s{header};
123
124	my $ret = $s{ret} eq 'void' ? '' : 'ret = ';
125	$ret .= '(long)' if $s{ret} eq 'void *';
126
127	my (@args, @defines);
128	my $argname = '';
129	if ($s{argtypes}) {
130		if (@{ $s{argtypes} } > 1) {
131			@defines = map {
132				my $t = $_->{type};
133				my $n = $_->{name};
134				$n = "_$n" if $n eq $name; # link :-/
135				push @args, $n;
136				PERL_LONG_ARGS
137				    ? "$t $n = ($t)va_arg(args, long);"
138				    : "$t $n = va_arg(args, $t);"
139			    } @{ $s{argtypes} };
140		} else {
141			if (@{ $s{argtypes} }) {
142				$argname = " // " . join ', ',
143				    map { $_->{name} }
144				    @{ $s{argtypes} };
145			}
146			@args = map { "va_arg(args, $_->{type})" }
147			    @{ $s{argtypes} };
148		}
149	} else {
150		@args = @{ $s{args} };
151
152		# If we didn't find args in syscallargs.h but have args
153		# we don't know how to write our function.
154		$s{skip} //= "Not found in sys/syscallargs.h"
155		    if @args;
156	}
157
158	#my $header = $s{header} ? " <$s{header}>" : '';
159
160	my $indent = "\t";
161	say "$indent/* $s{skip}" if $s{skip};
162
163	$indent .= ' *' if $s{skip};
164	say "${indent}                  $s{signature} <sys/syscall.h>"
165	    if $s{skip} && $s{skip} =~ /Mismatch/;
166
167	my $brace = @defines ? " {" : "";
168	say "${indent}case $s{define}:$brace"; # // $s{id}";
169	say "${indent}\t$_" for @defines;
170	#say "${indent}\t// $s{signature}$header";
171	say "${indent}\t$ret$name(" . join(', ', @args) . ");$argname";
172	say "${indent}\tbreak;";
173	say "${indent}}" if $brace;
174
175	say "\t */" if $s{skip};
176}
177
178print <<"EOL";
179	default:
180		ret = -1;
181		errno = ENOSYS;
182	}
183	va_end(args);
184
185	return ret;
186}
187EOL
188
189
190sub parse_syscalls($syscall, $args)
191{
192	my %s = parse_syscall_h($syscall)->%*;
193
194	my %a = parse_syscallargs_h($args)->%*;
195	$s{$_}{argtypes} = $a{$_} for grep { $a{$_} } keys %s;
196
197	return \%s;
198}
199
200sub parse_syscall_h($filename)
201{
202	my %s;
203	open my $fh, '<', $filename;
204	while (readline $fh) {
205		if (m{^/\*
206		    \s+ syscall: \s+ "(?<name>[^"]+)"
207		    \s+	 ret: \s+ "(?<ret> [^"]+)"
208		    \s+	args: \s+  (?<args>.*?)
209		    \s* \*/
210		  |
211		    ^\#define \s+ (?<define>SYS_(?<name>\S+)) \s+ (?<id>\d+)
212		}x)
213		{
214			my $name        = $+{name};
215			$s{$name}{$_}   = $+{$_} for keys %+;
216			$s{$name}{args} = [ $+{args} =~ /"(.*?)"/g ]
217			    if exists $+{args};
218		}
219	}
220	close $fh;
221
222	foreach my $name (keys %s) {
223		my %d = %{ $s{$name} };
224		next unless $d{ret}; # the MAXSYSCALL
225
226		my $ret = $d{ret};
227		my @args = @{ $d{args} || [] };
228		@args = 'void' unless @args;
229
230		if ($args[-1] ne '...') {
231			my @a;
232			for (@args) {
233				push @a, $_;
234				last if $_ eq '...';
235			}
236			@args = @a;
237		}
238
239		my $args = join ", ", @args;
240		$s{$name}{signature} = "$ret\t$name($args);" =~ s/\s+/ /gr;
241		#print "    $s{$name}{signature}\n";
242	}
243
244	return \%s;
245}
246
247sub parse_syscallargs_h($filename)
248{
249	my %args;
250
251	open my $fh, '<', $filename;
252	while (readline $fh) {
253		if (my ($syscall) = /^struct \s+ sys_(\w+)_args \s+ \{/x) {
254			$args{$syscall} = [];
255			while (readline $fh) {
256				last if /^\s*\};\s*$/;
257				if (/syscallarg
258				    \(  (?<type> [^)]+ ) \)
259				    \s+ (?<name>   \w+ ) \s* ;
260				/x) {
261					push @{$args{$syscall}}, {%+};
262				}
263			}
264		}
265	}
266	close $fh;
267
268	return \%args;
269}
270
271sub find_func_sig($content, $name, $s)
272{
273	my $re = $s->{re} //= qr{^
274		(?<ret> \S+ (?: [^\S\n]+ \S+)? ) [^\S\n]* \n?
275		\b \Q$name\E \( (?<args> [^)]* ) \)
276		[^;]*;
277	    }xms;
278
279	$content =~ /$re/ || return !!0;
280	my $ret  = $+{ret};
281	my $args = $+{args};
282
283	for ($ret, $args) {
284		s/^\s+//;
285		s/\s+$//;
286		s/\s+/ /g;
287	}
288
289	# The actual functions may have this extra annotation
290	$args =~ s/\*\s*__restrict/*/g;
291
292	my %func_sig = ( ret => $ret, args => [ split /\s*,\s*/, $args ] );
293
294	return "$ret $name($args);" =~ s/\s+/ /gr
295	    unless sigs_match($s, \%func_sig);
296
297	return \%func_sig;
298}
299
300# Tests whether two types are equivalent.
301# Sometimes there are two ways to represent the same thing
302# and it seems the functions and the syscalls
303# differ a fair amount.
304sub types_match($l, $r)
305{
306	state %m = (
307	    caddr_t         => 'char *',
308	    idtype_t        => 'int',
309	    nfds_t          => 'u_int',
310	    __off_t         => 'off_t',
311	    pid_t           => 'int',
312	    __size_t        => 'u_long',
313	    size_t          => 'u_long',
314	    'unsigned int'  => 'u_int',
315	    'unsigned long' => 'u_long',
316	);
317
318	$l //= '__undef__';
319	$r //= '__undef__';
320
321	s/\b volatile \s+//x  for $l, $r;
322	s/\b const    \s+//x  for $l, $r;
323	s/\s* \[\d*\] $/ \*/x for $l, $r;
324
325	my ($f, $s) = sort { length($a) <=> length($b) } $l, $r;
326	if (index($s, $f) == 0) {
327		$s =~ s/^\Q$f\E\s*//;
328		if ( $s && $s =~ /^\w+$/ ) {
329			#warn "prefix ['$f', '$s']\n";
330			s/\s*\Q$s\E$// for $l, $r;
331		}
332	}
333
334	$l = $m{$l} //= $l;
335	$r = $m{$r} //= $r;
336
337	return $l eq $r;
338}
339
340
341# Tests whether two function signatures match,
342# expected to be left from syscall.h, right from the appopriate header.
343sub sigs_match($l, $r)
344{
345	return !!0 unless types_match( $l->{ret}, $l->{ret} );
346
347	my @l_args = @{ $l->{args} || [] };
348	my @r_args = @{ $r->{args} || [] };
349
350	for (\@l_args, \@r_args) {
351		@{$_} = 'void' unless @{$_};
352	}
353
354	for my $i ( 0 .. $#l_args ) {
355		return !!0 unless types_match($l_args[$i], $r_args[$i]);
356		last if $l_args[$i] eq '...';
357	}
358
359	return !!1;
360}
361