Add file descriptor table reservations.

The file descriptor table now allows reserving room for multiple file
descriptors without assigning their numbers. This functionality means
any error conditions happen up front and the subsequent number
assignment will never fail.

This change uses the new functionality to fix troublesome error handling
when allocating multiple file descriptors. One pty allocation error path
was even wrong.

There were subtle race conditions where one (kernel) thread may have
allocated one file descriptor, and another thread spuciously replaces it
with something else, and then the second file descriptor allocation
failed in the first thread, and it closes the first file descriptor now
pointing to a different file description. This case seems harmless but
it's not a great class of bugs to exist in the first place. The new
behavior means the file descriptions appear in the file descriptor table
without fail and never needs to be cleaned up midway and is certainly
immune to shenangians from other threads.

Reviewed-by: Pedro Falcato <pedro.falcato@gmail.com>
This commit is contained in:
Jonas 'Sortie' Termansen 2021-12-18 00:05:46 +01:00
parent 42f6a359d1
commit b9898086c6
5 changed files with 164 additions and 124 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. * Copyright (c) 2011-2016, 2021 Jonas 'Sortie' Termansen.
* *
* Permission to use, copy, modify, and distribute this software for any * Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above * purpose with or without fee is hereby granted, provided that the above
@ -33,22 +33,33 @@
namespace Sortix { namespace Sortix {
struct DescriptorEntry
{
Ref<Descriptor> desc;
int flags;
};
DescriptorTable::DescriptorTable() DescriptorTable::DescriptorTable()
{ {
dtablelock = KTHREAD_MUTEX_INITIALIZER; dtablelock = KTHREAD_MUTEX_INITIALIZER;
entries = NULL; entries = NULL;
numentries = 0; entries_used = 0;
entries_length = 0;
reserved_count = 0;
first_not_taken = 0; first_not_taken = 0;
} }
DescriptorTable::~DescriptorTable() DescriptorTable::~DescriptorTable()
{ {
Reset(); for ( int i = 0; i < entries_length; i++ )
if ( entries[i].desc )
entries[i].desc.Reset();
delete[] entries;
} }
bool DescriptorTable::IsGoodEntry(int i) bool DescriptorTable::IsGoodEntry(int i) // dtablelock locked
{ {
return 0 <= i && i < numentries && entries[i].desc; return 0 <= i && i < entries_length && entries[i].desc;
} }
Ref<DescriptorTable> DescriptorTable::Fork() Ref<DescriptorTable> DescriptorTable::Fork()
@ -57,12 +68,12 @@ Ref<DescriptorTable> DescriptorTable::Fork()
Ref<DescriptorTable> ret(new DescriptorTable); Ref<DescriptorTable> ret(new DescriptorTable);
if ( !ret ) if ( !ret )
return Ref<DescriptorTable>(NULL); return Ref<DescriptorTable>(NULL);
ret->entries = new dtableent_t[numentries]; ret->entries = new DescriptorEntry[entries_length];
if ( !ret->entries ) if ( !ret->entries )
return Ref<DescriptorTable>(NULL); return Ref<DescriptorTable>(NULL);
ret->first_not_taken = 0; // Copy all the file descriptors except ones closed on fork.
ret->numentries = numentries; ret->entries_length = entries_length;
for ( int i = 0; i < numentries; i++ ) for ( int i = 0; i < entries_length; i++ )
{ {
if ( !entries[i].desc || entries[i].flags & FD_CLOFORK ) if ( !entries[i].desc || entries[i].flags & FD_CLOFORK )
{ {
@ -71,6 +82,7 @@ Ref<DescriptorTable> DescriptorTable::Fork()
continue; continue;
} }
ret->entries[i] = entries[i]; ret->entries[i] = entries[i];
ret->entries_used++;
if ( ret->first_not_taken == i ) if ( ret->first_not_taken == i )
ret->first_not_taken = i + 1; ret->first_not_taken = i + 1;
} }
@ -85,79 +97,133 @@ Ref<Descriptor> DescriptorTable::Get(int index)
return entries[index].desc; return entries[index].desc;
} }
bool DescriptorTable::Enlargen(int atleast) // Expands the table to have at least need_entries entries and at least
// need_unused unused entries.
bool DescriptorTable::Enlargen(int need_entries,
int need_unused) // dtablelock taken
{ {
if ( numentries == INT_MAX ) // Figure out how many entries are needed to satisfy the need for unused
return errno = EMFILE, false; // Cannot enlargen any more. // entries and grow the entries to that size if larger than need_entries.
int newnumentries = 8; if ( __builtin_add_overflow(need_unused, entries_used, &need_unused) ||
if ( numentries && __builtin_mul_overflow(2, numentries, &newnumentries) ) __builtin_add_overflow(need_unused, reserved_count, &need_unused) )
newnumentries = INT_MAX; return errno = EMFILE, false;
if ( newnumentries < atleast ) if ( need_entries < need_unused )
newnumentries = atleast; need_entries = need_unused;
dtableent_t* newentries = new dtableent_t[newnumentries]; if ( need_entries <= entries_length )
if ( !newentries ) return true;
if ( entries_length == INT_MAX )
return errno = EMFILE, false;
// At least double the size of the table but maybe more entries are needed.
int new_entries_length = 8;
if ( entries_length &&
__builtin_mul_overflow(2, entries_length, &new_entries_length) )
new_entries_length = INT_MAX;
if ( new_entries_length < need_entries )
new_entries_length = need_entries;
DescriptorEntry* new_entries = new DescriptorEntry[new_entries_length];
if ( !new_entries )
return false; return false;
for ( int i = 0; i < numentries; i++ ) for ( int i = 0; i < entries_length; i++ )
{ {
newentries[i] = entries[i]; new_entries[i] = entries[i];
entries[i].desc.Reset(); entries[i].desc.Reset();
} }
for ( int i = numentries; i < newnumentries; i++ ) for ( int i = entries_length; i < new_entries_length; i++ )
{ {
//newentries[i].desc = NULL; // Constructor did this already. //new_entries[i].desc = NULL; // Constructor did this already.
newentries[i].flags = 0; new_entries[i].flags = 0;
} }
delete[] entries; delete[] entries;
entries = newentries; entries = new_entries;
numentries = newnumentries; entries_length = new_entries_length;
return true; return true;
} }
bool DescriptorTable::Reserve(int count, int* reservation)
{
assert(0 <= count);
if ( !Enlargen(0, count) )
return false;
assert(reserved_count <= entries_length - entries_used);
assert(reserved_count + count <= entries_length - entries_used);
reserved_count += count;
*reservation = count;
return true;
}
void DescriptorTable::Unreserve(int* reservation)
{
assert(0 <= *reservation);
assert(*reservation <= reserved_count);
reserved_count -= *reservation;
*reservation = 0;
}
int DescriptorTable::AllocateInternal(Ref<Descriptor> desc, int DescriptorTable::AllocateInternal(Ref<Descriptor> desc,
int flags, int flags,
int min_index) int min_index,
int* reservation) // dtablelock locked
{ {
// dtablelock is held. assert(!reservation || 1 <= *reservation);
assert(!reservation || !min_index);
if ( flags & ~__FD_ALLOWED_FLAGS ) if ( flags & ~__FD_ALLOWED_FLAGS )
return errno = EINVAL, -1; return errno = EINVAL, -1;
if ( min_index < 0 ) if ( min_index < 0 )
return errno = EINVAL, -1; return errno = EINVAL, -1;
if ( min_index < first_not_taken ) if ( min_index < first_not_taken )
min_index = first_not_taken; min_index = first_not_taken;
for ( int i = min_index; i < numentries; i++ ) int first_available = min_index;
for ( int i = min_index; i < entries_length; i++ )
{ {
if ( entries[i].desc ) if ( entries[i].desc )
{
if ( first_available == i )
first_available = i + 1;
if ( first_not_taken == i )
first_not_taken = i + 1;
continue; continue;
}
if ( !reservation && entries_length - reserved_count <= i )
break;
entries[i].desc = desc; entries[i].desc = desc;
entries[i].flags = flags; entries[i].flags = flags;
entries_used++;
if ( reservation )
{
assert(reserved_count);
(*reservation)--;
reserved_count--;
}
if ( first_not_taken == i ) if ( first_not_taken == i )
first_not_taken = i + 1; first_not_taken = i + 1;
return i; return i;
} }
first_not_taken = numentries; assert(!reservation);
int oldnumentries = numentries; if ( !Enlargen(first_available + 1, 1) )
if ( !Enlargen(min_index) )
return -1; return -1;
int i = oldnumentries; entries[first_available].desc = desc;
entries[i].desc = desc; entries[first_available].flags = flags;
entries[i].flags = flags; entries_used++;
if ( first_not_taken == i ) if ( first_not_taken == first_available )
first_not_taken = i + 1; first_not_taken = first_available + 1;
return i; return first_available;
} }
int DescriptorTable::Allocate(Ref<Descriptor> desc, int flags, int min_index) int DescriptorTable::Allocate(Ref<Descriptor> desc, int flags, int min_index,
int* reservation)
{ {
ScopedLock lock(&dtablelock); ScopedLock lock(&dtablelock);
return AllocateInternal(desc, flags, min_index); return AllocateInternal(desc, flags, min_index, reservation);
} }
int DescriptorTable::Allocate(int src_index, int flags, int min_index) int DescriptorTable::Allocate(int src_index, int flags, int min_index,
int* reservation)
{ {
ScopedLock lock(&dtablelock); ScopedLock lock(&dtablelock);
if ( !IsGoodEntry(src_index) ) if ( !IsGoodEntry(src_index) )
return errno = EBADF, -1; return errno = EBADF, -1;
return AllocateInternal(entries[src_index].desc, flags, min_index); return AllocateInternal(entries[src_index].desc, flags, min_index,
reservation);
} }
int DescriptorTable::Copy(int from, int to, int flags) int DescriptorTable::Copy(int from, int to, int flags)
@ -167,25 +233,18 @@ int DescriptorTable::Copy(int from, int to, int flags)
ScopedLock lock(&dtablelock); ScopedLock lock(&dtablelock);
if ( from < 0 || to < 0 ) if ( from < 0 || to < 0 )
return errno = EINVAL, -1; return errno = EINVAL, -1;
if ( !(from < numentries) ) if ( !IsGoodEntry(from) )
return errno = EBADF, -1;
if ( !entries[from].desc )
return errno = EBADF, -1; return errno = EBADF, -1;
if ( from == to ) if ( from == to )
return errno = EINVAL, -1; return errno = EINVAL, -1;
while ( !(to < numentries) ) if ( to == INT_MAX )
{ return errno = EBADF, -1;
if ( to == INT_MAX ) if ( !IsGoodEntry(to) && !Enlargen(to + 1, 1) )
return errno = EBADF, -1; return -1;
if ( !Enlargen(to + 1) )
return -1;
}
if ( entries[to].desc != entries[from].desc ) if ( entries[to].desc != entries[from].desc )
{ {
if ( entries[to].desc ) if ( !entries[to].desc )
{ entries_used++;
// TODO: Should this be synced or otherwise properly closed?
}
entries[to].desc = entries[from].desc; entries[to].desc = entries[from].desc;
} }
entries[to].flags = flags; entries[to].flags = flags;
@ -195,12 +254,14 @@ int DescriptorTable::Copy(int from, int to, int flags)
} }
Ref<Descriptor> DescriptorTable::FreeKeepInternal(int index) Ref<Descriptor> DescriptorTable::FreeKeepInternal(int index)
// dtablelock locked
{ {
if ( !IsGoodEntry(index) ) if ( !IsGoodEntry(index) )
return errno = EBADF, Ref<Descriptor>(NULL); return errno = EBADF, Ref<Descriptor>(NULL);
Ref<Descriptor> ret = entries[index].desc; Ref<Descriptor> ret = entries[index].desc;
entries[index].desc.Reset(); entries[index].desc.Reset();
entries[index].flags = 0; entries[index].flags = 0;
entries_used--;
if ( index < first_not_taken ) if ( index < first_not_taken )
first_not_taken = index; first_not_taken = index;
return ret; return ret;
@ -220,7 +281,7 @@ void DescriptorTable::Free(int index)
void DescriptorTable::OnExecute() void DescriptorTable::OnExecute()
{ {
ScopedLock lock(&dtablelock); ScopedLock lock(&dtablelock);
for ( int i = 0; i < numentries; i++ ) for ( int i = 0; i < entries_length; i++ )
{ {
if ( !entries[i].desc ) if ( !entries[i].desc )
continue; continue;
@ -229,21 +290,10 @@ void DescriptorTable::OnExecute()
entries[i].desc.Reset(); entries[i].desc.Reset();
if ( i < first_not_taken ) if ( i < first_not_taken )
first_not_taken = i; first_not_taken = i;
entries_used--;
} }
} }
void DescriptorTable::Reset()
{
ScopedLock lock(&dtablelock);
for ( int i = 0; i < numentries; i++ )
if ( entries[i].desc )
entries[i].desc.Reset();
numentries = 0;
delete[] entries;
entries = NULL;
first_not_taken = 0;
}
bool DescriptorTable::SetFlags(int index, int flags) bool DescriptorTable::SetFlags(int index, int flags)
{ {
if ( flags & ~__FD_ALLOWED_FLAGS ) if ( flags & ~__FD_ALLOWED_FLAGS )
@ -267,7 +317,7 @@ int DescriptorTable::Previous(int index)
{ {
ScopedLock lock(&dtablelock); ScopedLock lock(&dtablelock);
if ( index < 0 ) if ( index < 0 )
index = numentries; index = entries_length;
do index--; do index--;
while ( 0 <= index && !IsGoodEntry(index) ); while ( 0 <= index && !IsGoodEntry(index) );
if ( index < 0 ) if ( index < 0 )
@ -280,11 +330,11 @@ int DescriptorTable::Next(int index)
ScopedLock lock(&dtablelock); ScopedLock lock(&dtablelock);
if ( index < 0 ) if ( index < 0 )
index = -1; index = -1;
if ( numentries <= index ) if ( entries_length <= index )
return errno = EBADF, -1; return errno = EBADF, -1;
do index++; do index++;
while ( index < numentries && !IsGoodEntry(index) ); while ( index < entries_length && !IsGoodEntry(index) );
if ( numentries <= index ) if ( entries_length <= index )
return errno = EBADF, -1; return errno = EBADF, -1;
return index; return index;
} }
@ -295,7 +345,7 @@ int DescriptorTable::CloseFrom(int index)
return errno = EBADF, -1; return errno = EBADF, -1;
ScopedLock lock(&dtablelock); ScopedLock lock(&dtablelock);
bool any = false; bool any = false;
for ( ; index < numentries; index++ ) for ( ; index < entries_length; index++ )
{ {
if ( !IsGoodEntry(index) ) if ( !IsGoodEntry(index) )
continue; continue;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011, 2012, 2013, 2014, 2015 Jonas 'Sortie' Termansen. * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2021 Jonas 'Sortie' Termansen.
* *
* Permission to use, copy, modify, and distribute this software for any * Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above * purpose with or without fee is hereby granted, provided that the above
@ -25,12 +25,7 @@
namespace Sortix { namespace Sortix {
class Descriptor; class Descriptor;
struct DescriptorEntry;
typedef struct dtableent_struct
{
Ref<Descriptor> desc;
int flags;
} dtableent_t;
class DescriptorTable : public Refcountable class DescriptorTable : public Refcountable
{ {
@ -39,8 +34,12 @@ public:
virtual ~DescriptorTable(); virtual ~DescriptorTable();
Ref<DescriptorTable> Fork(); Ref<DescriptorTable> Fork();
Ref<Descriptor> Get(int index); Ref<Descriptor> Get(int index);
int Allocate(Ref<Descriptor> desc, int flags, int min_index = 0); bool Reserve(int count, int* reservation);
int Allocate(int src_index, int flags, int min_index = 0); void Unreserve(int* reservation);
int Allocate(Ref<Descriptor> desc, int flags, int min_index = 0,
int* reservation = NULL);
int Allocate(int src_index, int flags, int min_index = 0,
int* reservation = NULL);
int Copy(int from, int to, int flags); int Copy(int from, int to, int flags);
void Free(int index); void Free(int index);
Ref<Descriptor> FreeKeep(int index); Ref<Descriptor> FreeKeep(int index);
@ -52,16 +51,18 @@ public:
int CloseFrom(int index); int CloseFrom(int index);
private: private:
void Reset(); // Hey, reference counted. Don't call this.
bool IsGoodEntry(int i); bool IsGoodEntry(int i);
bool Enlargen(int atleast); bool Enlargen(int need_index, int need_count);
int AllocateInternal(Ref<Descriptor> desc, int flags, int min_index); int AllocateInternal(Ref<Descriptor> desc, int flags, int min_index,
int* reservation);
Ref<Descriptor> FreeKeepInternal(int index); Ref<Descriptor> FreeKeepInternal(int index);
private: private:
kthread_mutex_t dtablelock; kthread_mutex_t dtablelock;
dtableent_t* entries; struct DescriptorEntry* entries;
int numentries; int entries_used;
int entries_length;
int reserved_count;
int first_not_taken; int first_not_taken;
}; };

View File

@ -1005,20 +1005,16 @@ int sys_fsm_mountat(int dirfd, const char* path, const struct stat* rootst, int
Ref<Descriptor> from = PrepareLookup(pathcopy, dirfd); Ref<Descriptor> from = PrepareLookup(pathcopy, dirfd);
if ( !from ) if ( !from )
return delete[] pathcopy, -1; return delete[] pathcopy, -1;
Ref<DescriptorTable> dtable = CurrentProcess()->GetDTable();
int reservation;
if ( !dtable->Reserve(1, &reservation) )
return delete[] pathcopy, -1;
Ref<Descriptor> desc = from->fsm_mount(&ctx, pathcopy, rootst, flags); Ref<Descriptor> desc = from->fsm_mount(&ctx, pathcopy, rootst, flags);
delete[] pathcopy; delete[] pathcopy;
if ( !desc ) if ( !desc )
return -1; return dtable->Unreserve(&reservation), -1;
Ref<DescriptorTable> dtable = CurrentProcess()->GetDTable(); int ret = dtable->Allocate(desc, fdflags, 0, &reservation);
int ret = dtable->Allocate(desc, fdflags); assert(0 <= ret);
if ( ret < 0 )
{
// TODO: We should use a fail-safe dtable reservation mechanism that
// causes this error earlier before we have side effects.
int errnum = errno;
from->unmount(&ctx, pathcopy, 0);
return errno = errnum, -1;
}
return ret; return ret;
} }

View File

@ -767,22 +767,18 @@ int sys_pipe2(int* pipefd, int flags)
if ( !recv_desc || !send_desc ) return -1; if ( !recv_desc || !send_desc ) return -1;
Ref<DescriptorTable> dtable = process->GetDTable(); Ref<DescriptorTable> dtable = process->GetDTable();
int reservation;
if ( !dtable->Reserve(2, &reservation) )
return -1;
int recv_index = dtable->Allocate(recv_desc, fdflags, 0, &reservation);
int send_index = dtable->Allocate(send_desc, fdflags, 0, &reservation);
assert(0 <= recv_index);
assert(0 <= send_index);
int ret[2] = { recv_index, send_index };
if ( !CopyToUser(pipefd, ret, sizeof(ret)) )
return -1;
int recv_index, send_index; return 0;
if ( 0 <= (recv_index = dtable->Allocate(recv_desc, fdflags)) )
{
if ( 0 <= (send_index = dtable->Allocate(send_desc, fdflags)) )
{
int ret[2] = { recv_index, send_index };
if ( CopyToUser(pipefd, ret, sizeof(ret)) )
return 0;
dtable->Free(send_index);
}
dtable->Free(recv_index);
}
return -1;
} }
} // namespace Sortix } // namespace Sortix

View File

@ -809,18 +809,15 @@ int sys_mkpty(int* master_fd_user, int* slave_fd_user, int flags)
return -1; return -1;
Ref<DescriptorTable> dtable = process->GetDTable(); Ref<DescriptorTable> dtable = process->GetDTable();
int master_fd = dtable->Allocate(master_desc, fdflags); int reservation = 0;
int slave_fd = dtable->Allocate(slave_desc, fdflags); if ( !dtable->Reserve(2, &reservation) )
return -1;
int master_fd = dtable->Allocate(master_desc, fdflags, 0, &reservation);
int slave_fd = dtable->Allocate(slave_desc, fdflags, 0, &reservation);
assert(0 <= master_fd);
assert(0 <= slave_fd);
master_desc.Reset(); master_desc.Reset();
slave_desc.Reset(); slave_desc.Reset();
if ( master_fd < 0 || slave_fd < 0 )
{
if ( 0 < master_fd )
dtable->Free(master_fd);
if ( 0 < master_fd )
dtable->Free(slave_fd);
return -1;
}
dtable.Reset(); dtable.Reset();
if ( !CopyToUser(master_fd_user, &master_fd, sizeof(int)) || if ( !CopyToUser(master_fd_user, &master_fd, sizeof(int)) ||