sortix-mirror/kernel/memorymanagement.cpp

516 lines
16 KiB
C++
Raw Permalink Normal View History

/*
* Copyright (c) 2011, 2012, 2013, 2015, 2022, 2023 Jonas 'Sortie' Termansen.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* memorymanagement.cpp
* Functions that allow modification of virtual memory.
*/
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sortix/memusage.h>
#include <sortix/mman.h>
#include <sortix/seek.h>
#include <sortix/kernel/copy.h>
#include <sortix/kernel/descriptor.h>
#include <sortix/kernel/ioctx.h>
#include <sortix/kernel/kernel.h>
#include <sortix/kernel/memorymanagement.h>
#include <sortix/kernel/process.h>
#include <sortix/kernel/segment.h>
2013-01-08 23:41:35 +00:00
#include <sortix/kernel/syscall.h>
namespace Sortix {
// TODO: After releasing Sortix 1.1, remove this deprecated system call retained
// for backwards ABI compatibility with Sortix 1.0's xz port.
int sys_memstat(size_t* memused, size_t* memtotal)
{
size_t used;
size_t total;
Memory::Statistics(&used, &total, NULL);
if ( memused && !CopyToUser(memused, &used, sizeof(used)) )
return -1;
2014-10-16 18:26:46 +00:00
if ( memtotal && !CopyToUser(memtotal, &total, sizeof(total)) )
return -1;
return 0;
}
int sys_memusage(const size_t* user_statistics,
size_t* user_values,
size_t length)
{
size_t used;
size_t total;
size_t purposes[PAGE_USAGE_NUM_KINDS];
Memory::Statistics(&used, &total, purposes);
for ( size_t start = 0; start < length; )
{
const size_t BLOCK = 32;
size_t statistics[BLOCK];
size_t amount = length - start;
if ( BLOCK < amount )
amount = BLOCK;
if ( !CopyFromUser(statistics, user_statistics + start,
amount * sizeof(size_t)) )
return -1;
for ( size_t i = 0; i < amount; i++ )
{
size_t statistic = statistics[i];
size_t value;
if ( statistic == MEMUSAGE_TOTAL )
value = total;
else if ( statistic == MEMUSAGE_USED )
value = used;
else if ( MEMUSAGE_PURPOSE_FIRST <= statistic &&
statistic <= MEMUSAGE_PURPOSE_LAST )
value = purposes[statistic - MEMUSAGE_PURPOSE_FIRST];
else
return errno = EINVAL, -1;
statistics[i] = value;
}
if ( !CopyToUser(user_values + start, statistics,
amount * sizeof(size_t)) )
return -1;
start += amount;
}
return 0;
}
} // namespace Sortix
namespace Sortix {
namespace Memory {
void UnmapMemory(Process* process, uintptr_t addr, size_t size)
{
2015-05-14 13:19:23 +00:00
// process->segment_write_lock is held.
// process->segment_lock is held.
assert(Page::IsAligned(addr));
assert(Page::IsAligned(size));
assert(process == CurrentProcess());
if ( UINTPTR_MAX - addr < size )
size = Page::AlignDown(UINTPTR_MAX - addr);
if ( !size )
return;
struct segment unmap_segment;
unmap_segment.addr = addr;
unmap_segment.size = size;
unmap_segment.prot = 0;
while ( struct segment* conflict = FindOverlappingSegment(process,
&unmap_segment) )
{
// Delete the segment if covered entirely by our request.
if ( addr <= conflict->addr && conflict->addr + conflict->size <= addr + size )
{
uintptr_t conflict_offset = (uintptr_t) conflict - (uintptr_t) process->segments;
size_t conflict_index = conflict_offset / sizeof(struct segment);
Memory::UnmapRange(conflict->addr, conflict->size, PAGE_USAGE_USER_SPACE);
Memory::Flush();
if ( conflict_index + 1 == process->segments_used )
{
process->segments_used--;
continue;
}
process->segments[conflict_index] = process->segments[--process->segments_used];
qsort(process->segments, process->segments_used,
sizeof(struct segment), segmentcmp);
continue;
}
// Delete the middle of the segment if covered there by our request.
if ( conflict->addr < addr && addr + size - conflict->addr <= conflict->size )
{
Memory::UnmapRange(addr, size, PAGE_USAGE_USER_SPACE);
Memory::Flush();
struct segment right_segment;
right_segment.addr = addr + size;
right_segment.size = conflict->addr + conflict->size - (addr + size);
right_segment.prot = conflict->prot;
conflict->size = addr - conflict->addr;
// TODO: This shouldn't really fail as we free memory above, but
// this code isn't really provably reliable.
if ( !AddSegment(process, &right_segment) )
PanicF("Unexpectedly unable to split memory mapped segment");
continue;
}
// Delete the part of the segment covered partially from the left.
if ( addr <= conflict->addr )
{
Memory::UnmapRange(conflict->addr, addr + size - conflict->addr, PAGE_USAGE_USER_SPACE);
Memory::Flush();
conflict->size = conflict->addr + conflict->size - (addr + size);
conflict->addr = addr + size;
continue;
}
// Delete the part of the segment covered partially from the right.
if ( conflict->addr <= addr + size )
{
Memory::UnmapRange(addr, conflict->addr + conflict->size - addr, PAGE_USAGE_USER_SPACE);
Memory::Flush();
conflict->size -= conflict->addr + conflict->size - addr;
continue;
}
}
}
bool ProtectMemory(Process* process, uintptr_t addr, size_t size, int prot)
{
2015-05-14 13:19:23 +00:00
// process->segment_write_lock is held.
// process->segment_lock is held.
assert(Page::IsAligned(addr));
assert(Page::IsAligned(size));
assert(process == CurrentProcess());
// First split the segments overlapping with [addr, addr + size) into
// smaller segments that doesn't cross addr and addr+size, while verifying
// there are no gaps in that region. This is where the operation can fail as
// the AddSegment call can run out of memory. There is no harm in splitting
// the segments into smaller chunks.
for ( size_t offset = 0; offset < size; )
{
struct segment search_region;
search_region.addr = addr + offset;
search_region.size = Page::Size();
search_region.prot = prot;
struct segment* segment = FindOverlappingSegment(process, &search_region);
if ( !segment )
return errno = EINVAL, false;
// Split the segment into two if it begins before our search region.
if ( segment->addr < search_region.addr )
{
struct segment new_segment;
new_segment.addr = search_region.addr;
new_segment.size = segment->addr + segment->size - new_segment.addr;
new_segment.prot = segment->prot;
segment->size = search_region.addr - segment->addr;
if ( !AddSegment(process, &new_segment) )
{
segment->size += new_segment.size;
return false;
}
continue;
}
// Split the segment into two if it ends after addr + size.
if ( size < segment->addr + segment->size - addr )
{
struct segment new_segment;
new_segment.addr = addr + size;
new_segment.size = segment->addr + segment->size - new_segment.addr;
new_segment.prot = segment->prot;
segment->size = addr + size - segment->addr;
if ( !AddSegment(process, &new_segment) )
{
segment->size += new_segment.size;
return false;
}
continue;
}
offset += segment->size;
}
// Run through all the segments in the region [addr, addr+size) and change
// the permissions and update the permissions of the virtual memory itself.
for ( size_t offset = 0; offset < size; )
{
struct segment search_region;
search_region.addr = addr + offset;
search_region.size = Page::Size();
search_region.prot = prot;
struct segment* segment = FindOverlappingSegment(process, &search_region);
assert(segment);
if ( segment->prot != prot )
{
// TODO: There is a moment of inconsistency here when the segment
// table itself has another protection written than what
// what applies to the actual pages.
2014-06-15 12:35:13 +00:00
// TODO: SECURTIY: Does this have security implications?
segment->prot = prot;
for ( size_t i = 0; i < segment->size; i += Page::Size() )
Memory::PageProtect(segment->addr + i, prot);
2014-06-15 12:35:13 +00:00
Memory::Flush();
}
offset += segment->size;
}
return true;
}
bool MapMemory(Process* process, uintptr_t addr, size_t size, int prot)
{
2015-05-14 13:19:23 +00:00
// process->segment_write_lock is held.
// process->segment_lock is held.
assert(Page::IsAligned(addr));
assert(Page::IsAligned(size));
assert(process == CurrentProcess());
UnmapMemory(process, addr, size);
struct segment new_segment;
new_segment.addr = addr;
new_segment.size = size;
new_segment.prot = prot;
if ( !MapRange(new_segment.addr, new_segment.size, new_segment.prot, PAGE_USAGE_USER_SPACE) )
return false;
Memory::Flush();
if ( !AddSegment(process, &new_segment) )
{
UnmapRange(new_segment.addr, new_segment.size, PAGE_USAGE_USER_SPACE);
Memory::Flush();
return false;
}
2015-05-14 13:19:23 +00:00
// We have process->segment_write_lock locked, so we know that the memory in
// user space exists and we can safely zero it here.
// TODO: Another thread is able to see the old contents of the memory before
// we zero it causing potential information leaks.
2014-06-15 12:35:13 +00:00
// TODO: SECURITY: Information leak.
memset((void*) new_segment.addr, 0, new_segment.size);
return true;
}
} // namespace Memory
} // namespace Sortix
namespace Sortix {
const int USER_SETTABLE_PROT = PROT_USER;
const int UNDERSTOOD_MMAP_FLAGS = MAP_SHARED |
MAP_PRIVATE |
MAP_ANONYMOUS |
MAP_FIXED;
static
void* sys_mmap(void* addr_ptr, size_t size, int prot, int flags, int fd,
off_t offset)
{
// Verify that that the address is suitable aligned if fixed.
uintptr_t addr = (uintptr_t) addr_ptr;
if ( flags & MAP_FIXED && !Page::IsAligned(addr) )
return errno = EINVAL, MAP_FAILED;
// We don't allow zero-size mappings.
if ( size == 0 )
return errno = EINVAL, MAP_FAILED;
// Verify that the user didn't request permissions not allowed.
if ( prot & ~USER_SETTABLE_PROT )
return errno = EINVAL, MAP_FAILED;
// Verify that we understand all the flags we were passed.
if ( flags & ~UNDERSTOOD_MMAP_FLAGS )
return errno = EINVAL, MAP_FAILED;
// Verify that MAP_PRIVATE and MAP_SHARED are not both set.
if ( bool(flags & MAP_PRIVATE) == bool(flags & MAP_SHARED) )
return errno = EINVAL, MAP_FAILED;
// TODO: MAP_SHARED is not currently supported.
if ( flags & MAP_SHARED )
return errno = EINVAL, MAP_FAILED;
// Verify the fíle descriptor and the offset is suitable set if needed.
if ( !(flags & MAP_ANONYMOUS) &&
(fd < 0 || offset < 0 || (offset & (Page::Size()-1))) )
return errno = EINVAL, MAP_FAILED;
uintptr_t aligned_addr = Page::AlignDown(addr);
uintptr_t aligned_size = Page::AlignUp(size);
// Pick a good location near the end of user-space if no hint is given.
if ( !(flags & MAP_FIXED) && !aligned_addr )
{
uintptr_t userspace_addr;
size_t userspace_size;
Memory::GetUserVirtualArea(&userspace_addr, &userspace_size);
addr = aligned_addr =
Page::AlignDown(userspace_addr + userspace_size - aligned_size);
}
// Verify that the offset + size doesn't overflow.
if ( !(flags & MAP_ANONYMOUS) &&
(uintmax_t) (OFF_MAX - offset) < (uintmax_t) aligned_size )
return errno = EOVERFLOW, MAP_FAILED;
Process* process = CurrentProcess();
// Verify whether the backing file is usable for memory mapping.
ioctx_t ctx; SetupUserIOCtx(&ctx);
Ref<Descriptor> desc;
if ( !(flags & MAP_ANONYMOUS) )
{
if ( !(desc = process->GetDescriptor(fd)) )
return MAP_FAILED;
// Verify that the file is seekable.
Fix SEEK_END, file offset overflow, and read/write/mkpartition syscall bugs. Fix SEEK_END seeking twice as far as requested. Centralize lseek handling in one place and avoid overflow bugs. Inode lseek handlers now only need to handle SEEK_END with offset 0. Prevent the file offset from ever going below zero or overflowing. Character devices are now not seekable, but lseek will pretend they are, yet always stay at the file offset 0. pread/pwrite on character devices will now ignore the file offset and call read/write. This change prevents character devices from being memory mapped, notably /dev/zero can no longer be memory mapped. None of the current ports seem to rely on this behavior and will work with just MAP_ANONYMOUS. Refactor read and write system calls to have a shared return statement for both seekable and non-seekable IO. Fix file offset overflow bugs in read and write system calls. Fix system calls returning EPERM instead of properly returning EBADF when the file has not been opened in the right mode. Truncate IO counts and total vector IO length so the IO operation does not do any IO beyond OFF_MAX. Truncate also total vector IO length for recvmsg and sendmsg. Fail with EINVAL if total vector IO length exceeds SSIZE_MAX. Don't stop early if the total IO length is zero, so zero length IO now block on any locks internal to the inode. Handle reads at the maximum file offset with an end of file condition and handle writes of at least one byte at the maximum file offset by failing with EFBIG. Refactor UtilMemoryBuffer to store the file size using off_t instead of size_t to avoid casts and keep file sizes in the off_t type. Properly handle errors in the code, such as failing with EROFS instead of EBADF if the backing memory is not writeable, and failing with EFBIG if writing beyond the end of the file. Fix mkpartition not rejecting invalid partition start offsets and lengths. Strictly enforce partition start and length checks in the partition code. Enforce partitions exist within regular files or block devices. Fix a few indention issues.
2017-10-26 15:12:07 +00:00
if ( S_ISCHR(desc->type) || desc->lseek(&ctx, 0, SEEK_END) < 0 )
return errno = ENODEV, MAP_FAILED;
// Verify that we have read access to the file.
if ( desc->read(&ctx, NULL, 0) != 0 )
return errno = EACCES, MAP_FAILED;
// Verify that we have write access to the file if needed.
if ( (prot & PROT_WRITE) && !(flags & MAP_PRIVATE) &&
desc->write(&ctx, NULL, 0) != 0 )
return errno = EACCES, MAP_FAILED;
}
2015-05-14 13:19:23 +00:00
ScopedLock lock1(&process->segment_write_lock);
ScopedLock lock2(&process->segment_lock);
// Determine where to put the new segment and its protection.
struct segment new_segment;
if ( flags & MAP_FIXED )
2014-06-15 12:35:13 +00:00
new_segment.addr = aligned_addr,
new_segment.size = aligned_size;
else if ( !PlaceSegment(&new_segment, process, (void*) addr, aligned_size, flags) )
return errno = ENOMEM, MAP_FAILED;
new_segment.prot = PROT_KWRITE | PROT_FORK;
// Allocate a memory segment with the desired properties.
if ( !Memory::MapMemory(process, new_segment.addr, new_segment.size, new_segment.prot) )
return MAP_FAILED;
2014-09-22 13:03:37 +00:00
// The pread will copy to user-space right requires this lock to be free.
2015-05-14 13:19:23 +00:00
lock2.Reset();
2014-09-22 13:03:37 +00:00
// Read the file contents into the newly allocated memory.
if ( !(flags & MAP_ANONYMOUS) )
{
2015-05-14 13:19:23 +00:00
ioctx_t kctx; SetupKernelIOCtx(&kctx);
for ( size_t so_far = 0; so_far < aligned_size; )
{
uint8_t* ptr = (uint8_t*) (new_segment.addr + so_far);
size_t left = aligned_size - so_far;
off_t pos = offset + so_far;
2015-05-14 13:19:23 +00:00
ssize_t num_bytes = desc->pread(&kctx, ptr, left, pos);
if ( num_bytes < 0 )
{
// TODO: How should this situation be handled? For now we'll
// just ignore the error condition.
errno = 0;
break;
}
if ( !num_bytes )
{
// We got an unexpected early end-of-file condition, but that's
// alright as the MapMemory call zero'd the new memory and we
// are expected to zero the remainder.
break;
}
so_far += num_bytes;
}
}
// Finally switch to the desired page protections.
kthread_mutex_lock(&process->segment_lock);
if ( prot & PROT_READ )
prot |= PROT_KREAD;
if ( prot & PROT_WRITE )
prot |= PROT_KWRITE;
prot |= PROT_FORK;
Memory::ProtectMemory(CurrentProcess(), new_segment.addr, new_segment.size, prot);
kthread_mutex_unlock(&process->segment_lock);
2015-05-14 13:19:23 +00:00
lock1.Reset();
return (void*) new_segment.addr;
}
int sys_mprotect(const void* addr_ptr, size_t size, int prot)
{
// Verify that that the address is suitable aligned.
uintptr_t addr = (uintptr_t) addr_ptr;
if ( !Page::IsAligned(addr) )
return errno = EINVAL, -1;
// Verify that the user didn't request permissions not allowed.
if ( prot & ~USER_SETTABLE_PROT )
return errno = EINVAL, -1;
size = Page::AlignUp(size);
prot |= PROT_KREAD | PROT_KWRITE | PROT_FORK;
Process* process = CurrentProcess();
2015-05-14 13:19:23 +00:00
ScopedLock lock1(&process->segment_write_lock);
ScopedLock lock2(&process->segment_lock);
if ( !Memory::ProtectMemory(process, addr, size, prot) )
return -1;
return 0;
}
int sys_munmap(void* addr_ptr, size_t size)
{
// Verify that that the address is suitable aligned.
uintptr_t addr = (uintptr_t) addr_ptr;
if ( !Page::IsAligned(addr) )
return errno = EINVAL, -1;
// We don't allow zero-size unmappings.
if ( size == 0 )
return errno = EINVAL, -1;
size = Page::AlignUp(size);
Process* process = CurrentProcess();
2015-05-14 13:19:23 +00:00
ScopedLock lock1(&process->segment_write_lock);
ScopedLock lock2(&process->segment_lock);
Memory::UnmapMemory(process, addr, size);
return 0;
}
// TODO: We use a wrapper system call here because there are too many parameters
// to mmap for some platforms. We should extend the system call ABI so we
// can do system calls with huge parameter lists and huge return values
// portably - then we'll make sys_mmap use this mechanism if needed.
struct mmap_request /* duplicated in libc/sys/mman/mmap.cpp */
{
void* addr;
size_t size;
int prot;
int flags;
int fd;
off_t offset;
};
void* sys_mmap_wrapper(struct mmap_request* user_request)
{
struct mmap_request request;
if ( !CopyFromUser(&request, user_request, sizeof(request)) )
return MAP_FAILED;
return sys_mmap(request.addr, request.size, request.prot, request.flags,
request.fd, request.offset);
}
} // namespace Sortix