QEMU Memory Allocation Issue

I seem to have encountered an issue with qemu memory allocation.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
INTERNAL void *
x64_syscall_mmap(void *base_addr, u64 size, u32 memory_protection, 
                u32 mapping_visibility, s32 fd, u64 fd_offset)
{
  s32 result = 0;
  __asm__ __volatile__("mov r10, %5\n"
             "mov r8, %6\n"
             "mov r9, %7\n"
             "syscall"
              : "=a" (result)
              : "a" (9), 
                "D" ((u64)base_addr),
                "S" (size),
                "d" ((u64)memory_protection),
                "r" ((u64)mapping_visibility),
                "r" ((u64)fd),
                "r" (fd_offset)
              : "r10", "r8", "r9", "r11", "rcx", "memory");

  void *sys_result = (void *)((u64)result);
  if ((u64)result >= (u64)(-MAX_ERRNO)) {
    breakpoint();
    sys_result = NULL;
  }

  return sys_result;
}


typedef struct {
  void* base;
  u64 size;
  u64 used;
} X64MemArena;

GLOBAL X64MemArena platform_mem_arena = {0};

INTERNAL s32
x64_mem_arena_init(X64MemArena *mem_arena, u64 size)
{
  mem_arena->base = x64_syscall_mmap(NULL, size, PROT_READ | PROT_WRITE,
          MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  if (mem_arena->base == NULL) {
    breakpoint();
  }
  // I tried without this call, gives same result
  if (x64_syscall_mlock(mem_arena->base, mem_arena->size) < 0) {
    breakpoint();
  }

  mem_arena->used = 0;
  mem_arena->size = size;

  return 0;
}
// ...
// used here 
if (x64_mem_arena_init(&platform_mem_arena, 1024 * 1024 * 200) == -1) {
  breakpoint();
}

Run with
qemu-system-x86_64 -enable-kvm -m 512M -s -S -drive format=raw,file=ker.img -kernel /boot/vmlinuz-5.8.0-50-generic -append "root=/dev/sda init=/sbin/x64-ker nokaslr"

When I inspect the contents of (u8 *)platform_mem_arena.base in debugger I get 0xd6ad000 <error: Cannot access memory at address 0xd6ad000>.
In the qemu window I get a line: x86/mm: Checked W+X mappings: passed, no W+X pages found. Could this be something to do with it?

Edited by Ryan on
Your result variable is 32-bit int. Syscall returns result in rax register which is 64-bit. It probably has some upper 32-bits set there, which you don't store.

Edited by Mārtiņš Možeiko on
I changed result to s64 result = 0; but I still get the same invalid memory.
How exactly are you compiling that code? Because that inline assembly seems to use intel syntax? But by default that is not supported. In AT&T syntax registers are prefixed with % like %r10 and the order of operands are opposite (mov src, dst).

Does your x64_syscall_mmap function runs fine when you execute on your host Linux, without any qemu?

I usually write syscall inline asm like this:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
long my_syscall(int nr, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5)
{
    register long r10 __asm__ ("r10") = arg3;
    register long r8 __asm__ ("r8")   = arg4;
    register long r9 __asm__ ("r9")   = arg5;
    long result;
    __asm__ __volatile__(
        "syscall"
        : "=r" (result)
        : "0"(nr), "D"(arg0), "S"(arg1), "d"(arg2), "r"(r10), "r"(r8), "r"(r9)
        : "rcx", "r11", "memory"
    );
    return result;
}

Remove extra args if syscall needs less.

Edited by Mārtiņš Možeiko on
I compile with -masm=intel

Running on host it works fine. Memory is valid and zeroed.

I based my syscalls on this https://github.com/linux-on-ibm-z/linux-syscall-support

The reason I use qemu at all (it's a massive pain for me to debug) is because I want to use DRM/KMS for outputting graphics. Only DRM Master is allowed to do modesetting and there can only be one DRM Master. On host, X is already master so I can't use it (I want to have X to use a graphical debugger)

Edited by Ryan on
Have you tried putting breakpoint on "syscall" instruction and verifying input register values if they are what you expect? and then step over it, and check rax register?
Yes, it all seems fine. I think I have ruled out x86/mm: Checked W+X mappings: passed, no W+X pages found. being an issue: https://en.wikipedia.org/wiki/W%5EX