Add glob(3).

Switch sh(1) to use glob(3).

Remove compatibility for no glob(3) from the ports.
This commit is contained in:
Jonas 'Sortie' Termansen 2023-03-11 23:16:54 +01:00
parent 4e0ed30861
commit 47e1cc439a
8 changed files with 652 additions and 368 deletions

View File

@ -367,6 +367,8 @@ fstab/scanfsent.o \
fstab/setfsent.o \
getopt/getopt_long.o \
getopt/getopt.o \
glob/glob.o \
glob/globfree.o \
grp/endgrent.o \
grp/fgetgrent.o \
grp/fgetgrent_r.o \

474
libc/glob/glob.c Normal file
View File

@ -0,0 +1,474 @@
/*
* Copyright (c) 2023 Jonas 'Sortie' Termansen.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* glob/glob.c
* Search for paths matching a pattern.
*/
#include <sys/stat.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <glob.h>
#include <regex.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static int strcoll_indirect(const void* a_ptr, const void* b_ptr)
{
const char* a = *(const char* const*) a_ptr;
const char* b = *(const char* const*) b_ptr;
return strcoll(a, b);
}
struct segment
{
char* prefix;
size_t prefix_length;
DIR* dir;
bool trivial;
bool leading_period;
bool match_directory;
bool done;
union
{
struct
{
size_t start;
size_t length;
};
regex_t regex;
};
};
int glob(const char* restrict pattern,
int flags,
int (*errfunc)(const char*, int),
glob_t* restrict gl)
{
if ( !(flags & GLOB_DOOFFS) )
gl->gl_offs = 0;
if ( !(flags & GLOB_APPEND) )
{
gl->gl_pathv = NULL;
gl->gl_pathc = 0;
}
if ( gl->gl_offs == SIZE_MAX )
return GLOB_NOSPACE;
size_t initial_pathc = gl->gl_pathc;
// Reserve room for at least one string and the trailing null to prevent
// the possibly of late errors in the GLOB_NOCHECK case.
size_t pathl;
if ( __builtin_add_overflow(gl->gl_offs, gl->gl_pathc, &pathl) ||
__builtin_add_overflow(pathl, 2, &pathl) )
return GLOB_NOSPACE;
char** new_pathv = reallocarray(gl->gl_pathv, pathl, sizeof(char*));
if ( !new_pathv )
return GLOB_NOSPACE;
gl->gl_pathv = new_pathv;
size_t paths_length = gl->gl_pathc + 1;
// Parse the pattern into segments where trivial segments are fixed path
// components that can be directly opened and non-trivial segments require
// searching a directory for entries that match the pattern.
struct segment* segments = NULL;
size_t segments_count = 0;
size_t segments_length = 0;
int result = 0;
for ( size_t offset = 0; pattern[offset] && !result; )
{
// Combine multiple trivial path components into a trivial segment, but
// each non-trivial path component must be its own segment.
size_t segment_length = 0;
bool is_trivial = true;
for ( size_t i = 0; pattern[offset + i]; i++ )
{
if ( pattern[offset + i] == '*' ||
pattern[offset + i] == '?' ||
pattern[offset + i] == '[' )
{
if ( segment_length )
break;
is_trivial = false;
}
if ( pattern[offset + i] == '/' || !pattern[offset + i + 1] )
{
segment_length = i + 1;
if ( !is_trivial )
break;
}
}
// Grow the list of segments as needed.
if ( segments_count == segments_length )
{
size_t old_length = segments_length ? segments_length : 1;
struct segment* new_segments =
reallocarray(segments, old_length, 2 * sizeof(struct segment));
if ( !new_segments )
{
result = GLOB_NOSPACE;
break;
}
segments = new_segments;
segments_length = 2 * old_length;
}
struct segment* segment = &segments[segments_count++];
segment->match_directory = pattern[offset + segment_length - 1] == '/';
// Trivial segments just contain the pattern indices to directly open.
if ( (segment->trivial = is_trivial) )
{
segment->start = offset;
segment->length = segment_length;
}
// Non-trivial segments are translated to a regular expression that is
// compiled right now so it can be efficiently reused during the search.
else
{
// Match a leading period only if the pattern explicitly starts with
// a period. POSIX requires that leading periods aren't matched by
// the * and ? and [ operators, but also are not matched by negated
// patterns like [^a]. It's unspecified whether [.] would match a
// leading period. Although regular expressions can express such
// patterns, it's difficult to translate, and it's much easier to
// just special case the leading period like this.
segment->leading_period = pattern[offset] == '.';
char* re = NULL;
size_t re_size;
FILE* fp = open_memstream(&re, &re_size);
bool escaped = false;
fputc('^', fp);
// Translate the pattern to an extended regular expression.
for ( size_t i = 0; fp && i < segment_length; i++ )
{
unsigned char c = pattern[offset + i];
if ( !escaped && c == '*' )
fputs(".*", fp);
else if ( !escaped && c == '?' )
fputs(".", fp);
else if ( !escaped && c == '[' )
{
// The whole character range is passed directly to regcomp
// so the correct end just has to be found, taking the edge
// cases into account. POSIX requires using ! instead of ^
// for character range negations. As an extension, ^ is
// also just passed directly to regcomp and works.
const char* expr = pattern + offset + i;
size_t max = segment_length - i;
size_t len = 1;
if ( len < max && (expr[len] == '!' || expr[len] == '^') )
len++;
if ( len < max && expr[len] == ']' )
len++;
while ( len < max && expr[len] != ']' )
{
if ( 2 <= max - len && expr[len] == '[' &&
(expr[len + 1] == '.' || expr[len + 1] == '=' ||
expr[len + 1] == ':' ) )
{
char t = expr[len + 1];
len += 2;
while ( 2 <= max - len &&
!(expr[len] == t && expr[len + 1] == ']') )
len++;
len += max - len < 2 ? max - len : 2;
}
else
len++;
}
if ( len < max && expr[len] == ']' )
{
for ( size_t n = 0; n <= len; n++ )
{
if ( n == 1 && expr[n] == '!' )
fputc('^', fp);
else
fputc((unsigned char) expr[n], fp);
}
i += len;
}
else
fputs("\\[", fp);
}
else if ( !escaped && c == '\\' && !(flags & GLOB_NOESCAPE) )
escaped = true;
else if ( c != '/' )
{
if ( c == '\\' || c == '(' || c == ')' || c == '{' ||
c == '}' || c == '.' || c == '*' || c == '[' ||
c == ']' || c == '^' || c == '$' || c == '+' ||
c == '?' || c == '|' )
fputc('\\', fp);
fputc(c, fp);
escaped = false;
}
}
fputc('$', fp);
if ( !fp || ferror(fp) || fflush(fp) == EOF )
{
if ( fp )
fclose(fp);
free(re);
result = GLOB_NOSPACE;
segments_count--;
break;
}
fclose(fp);
// Compile and reuse the regular expression for this segment.
int ret = regcomp(&segment->regex, re, REG_EXTENDED);
free(re);
if ( ret )
{
result = GLOB_NOSPACE;
segments_count--;
break;
}
}
offset += segment_length;
}
// Start the search with the first segment.
if ( !result && segments_count )
{
segments[0].prefix = NULL;
segments[0].prefix_length = 0;
segments[0].dir = NULL;
segments[0].done = false;
// If the first segment is non-trivial then the current working
// directory needs to be opened and searched.
if ( !segments[0].trivial && !(segments[0].dir = opendir(".")) )
{
if ( errno == ENOMEM )
result = GLOB_NOSPACE;
else if ( (errfunc && errfunc(".", errno)) || (flags & GLOB_ERR) )
result = GLOB_ABORTED;
else
segments[0].done = true;
}
}
// Search the filesystem depth first for paths matching the pattern. The
// segments array is used for the hierarchical state to avoid recursion.
// Each active segment has a directory currently being searched and yields
// paths to be explored by the subsequent segment. The last segment adds
// paths to the output array if they match the pattern. The search is
// complete when the outermost segment is done or has failed.
size_t current_segment = 0;
while ( segments_count &&
(current_segment || !(segments[0].done || result) ))
{
struct segment* segment = &segments[current_segment];
// Pop to the the parent segment if the directory has been searched or
// if an error has happened and the search is aborting.
if ( segment->done || result )
{
free(segment->prefix);
segment->prefix = NULL;
if ( segment->dir )
closedir(segment->dir);
current_segment--;
continue;
}
char* name;
size_t name_length;
unsigned char type = DT_UNKNOWN;
// A trivial segment yields only the singular path it can match.
if ( segment->trivial )
{
name = strndup(pattern + segment->start, segment->length);
name_length = segment->length;
segment->done = true;
}
// Search the directory for entries matching the pattern.
else
{
errno = 0;
struct dirent* entry = readdir(segment->dir);
if ( !entry )
{
const char* path = segment->prefix ? segment->prefix : ".";
if ( errno == ENOMEM )
result = GLOB_NOSPACE;
else if ( (errfunc && errfunc(path, errno)) ||
(flags & GLOB_ERR) )
result = GLOB_ABORTED;
segment->done = true;
continue;
}
// Skip known non-directories when a directory needs to be found.
if ( (current_segment + 1 < segments_count ||
segment->match_directory) &&
entry->d_type != DT_UNKNOWN &&
entry->d_type != DT_DIR &&
entry->d_type != DT_LNK )
continue;
if ( !strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..") )
continue;
if ( entry->d_name[0] == '.' && !segment->leading_period )
continue;
if ( regexec(&segment->regex, entry->d_name, 0, NULL, 0) )
continue;
name = strdup(entry->d_name);
name_length = strlen(entry->d_name);
type = entry->d_type;
}
if ( !name )
{
result = GLOB_NOSPACE;
continue;
}
// Append the segment's prefix with the name but keep an extra byte for
// a possible trailing slash and of course the terminating nul byte.
size_t size = 0;
if ( __builtin_add_overflow(segment->prefix_length, name_length,
&size) ||
__builtin_add_overflow(size, 1 + 1, &size) )
{
free(name);
result = GLOB_NOSPACE;
continue;
}
char* path = malloc(size);
if ( !path )
{
free(name);
free(path);
result = GLOB_NOSPACE;
continue;
}
if ( segment->prefix_length )
memcpy(path, segment->prefix, segment->prefix_length);
memcpy(path + segment->prefix_length, name, name_length);
path[segment->prefix_length + name_length] = '\0';
int fd = segment->dir ? dirfd(segment->dir) : AT_FDCWD;
// If this is not the last segment, push to the next segment to search
// the directory just found.
if ( current_segment + 1 < segments_count )
{
struct segment* next_segment = &segments[current_segment + 1];
int mode = next_segment->trivial ? O_SEARCH : O_RDONLY;
int subdirfd = openat(fd, name, mode | O_DIRECTORY | O_CLOEXEC);
free(name);
next_segment->dir = subdirfd < 0 ? NULL : fdopendir(subdirfd);
if ( !next_segment->dir )
{
if ( 0 <= subdirfd )
close(subdirfd);
if ( errno != ENOENT && errno != ENOTDIR &&
((errfunc && errfunc(path, errno)) || (flags & GLOB_ERR)) )
result = GLOB_ABORTED;
free(path);
continue;
}
next_segment->prefix = path;
next_segment->prefix_length = size - 2;
// Add a trailing slash to the searched directory entries.
if ( !segment->trivial )
{
next_segment->prefix[next_segment->prefix_length++] = '/';
next_segment->prefix[next_segment->prefix_length] = '\0';
}
next_segment->done = false;
current_segment++;
continue;
}
// The last segment just needs to output paths if they exist.
else
{
bool want_slash = (flags & GLOB_MARK) || segment->match_directory;
bool exists = true, is_dir = false;
// The path is known to already exist for non-trivial segments since
// it was returned by readdir, but we may need to check if the path
// is a directory if readdir didn't tell us already.
if ( !segment->trivial &&
(!want_slash || (type != DT_UNKNOWN && type != DT_LNK)) )
is_dir = type == DT_DIR;
// Just check if the path exists if we don't add slashes to dirs.
else if ( !want_slash )
exists = !faccessat(fd, name, F_OK, AT_SYMLINK_NOFOLLOW);
// Otherwise use the slower stat operation to obtain the inode type.
else
{
struct stat st;
exists = !fstatat(fd, name, &st, AT_SYMLINK_NOFOLLOW);
if ( want_slash && S_ISLNK(st.st_mode) )
fstatat(fd, name, &st, 0);
is_dir = S_ISDIR(st.st_mode);
}
free(name);
if ( segment->match_directory && !is_dir )
{
free(path);
continue;
}
if ( want_slash && path[size - 3] != '/' )
path[size - 2] = '/', path[size - 1] = '\0';
if ( !exists )
{
if ( errno != ENOENT &&
((errfunc && errfunc(path, errno)) || (flags & GLOB_ERR)) )
result = GLOB_ABORTED;
free(path);
continue;
}
// Grow the output array as needed.
if ( gl->gl_pathc == paths_length )
{
size_t old_pathc = gl->gl_pathc ? gl->gl_pathc : 1;
if ( __builtin_mul_overflow(2, old_pathc, &pathl) ||
__builtin_add_overflow(gl->gl_offs, pathl, &pathl) ||
__builtin_add_overflow(1, pathl, &pathl) ||
!(new_pathv = reallocarray(gl->gl_pathv, pathl,
sizeof(char*))) )
{
free(path);
result = GLOB_NOSPACE;
continue;
}
gl->gl_pathv = new_pathv;
paths_length = old_pathc * 2;
}
gl->gl_pathv[gl->gl_offs + gl->gl_pathc++] = path;
}
}
// Clean up the segments and free the compiled regular expressions.
if ( segments_count && segments[0].dir )
closedir(segments[0].dir);
for ( size_t i = 0; i < segments_count; i++ )
if ( !segments[i].trivial )
regfree(&segments[i].regex);
free(segments);
// Output the input pattern if nothing matched when GLOB_NOCHECK.
if ( !result && gl->gl_pathc == initial_pathc )
{
if ( (flags & GLOB_NOCHECK) )
{
if ( (gl->gl_pathv[gl->gl_offs] = strdup(pattern)) )
gl->gl_pathc = 1;
else
result = GLOB_NOSPACE;
}
else
result = GLOB_NOMATCH;
}
// Sort the new entries per LC_COLLATE per POSIX.
if ( !(flags & GLOB_NOSORT) )
qsort(gl->gl_pathv + gl->gl_offs + initial_pathc,
gl->gl_pathc - initial_pathc, sizeof(char*),
strcoll_indirect);
gl->gl_pathv[gl->gl_offs + gl->gl_pathc] = NULL;
return result;
}

28
libc/glob/globfree.c Normal file
View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2023 Jonas 'Sortie' Termansen.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* glob/globfree.c
* Free allocate glob storage.
*/
#include <glob.h>
#include <stdlib.h>
void globfree(glob_t* gl)
{
for ( size_t i = 0; i < gl->gl_pathc; i++ )
free(gl->gl_pathv[gl->gl_offs + i]);
free(gl->gl_pathv);
}

62
libc/include/glob.h Normal file
View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2023 Jonas 'Sortie' Termansen.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* glob.h
* Search for paths matching a pattern.
*/
#ifndef _INCLUDE_GLOB_H
#define _INCLUDE_GLOB_H
#include <sys/cdefs.h>
#ifndef __size_t_defined
#define __size_t_defined
#define __need_size_t
#include <stddef.h>
#endif
typedef struct
{
size_t gl_pathc;
char** gl_pathv;
size_t gl_offs;
} glob_t;
#define GLOB_APPEND (1 << 0)
#define GLOB_DOOFFS (1 << 1)
#define GLOB_ERR (1 << 2)
#define GLOB_MARK (1 << 3)
#define GLOB_NOCHECK (1 << 4)
#define GLOB_NOESCAPE (1 << 5)
#define GLOB_NOSORT (1 << 6)
#define GLOB_ABORTED 1
#define GLOB_NOMATCH 2
#define GLOB_NOSPACE 3
#ifdef __cplusplus
extern "C" {
#endif
int glob(const char* __restrict, int, int (*)(const char *, int),
glob_t* __restrict);
void globfree(glob_t*);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

View File

@ -157,77 +157,3 @@ diff -Paur --no-dereference -- libxml2.upstream/nanohttp.c libxml2/nanohttp.c
{
struct hostent *h;
struct in_addr ia;
diff -Paur --no-dereference -- libxml2.upstream/runtest.c libxml2/runtest.c
--- libxml2.upstream/runtest.c
+++ libxml2/runtest.c
@@ -198,6 +198,33 @@
}
}
+#elif defined(__sortix__)
+
+typedef struct
+{
+ size_t gl_pathc; /* Count of paths matched so far */
+ char **gl_pathv; /* List of matched pathnames. */
+ size_t gl_offs; /* Slots to reserve in 'gl_pathv'. */
+} glob_t;
+
+#define GLOB_DOOFFS 0
+static int glob(const char *pattern, int flags,
+ int errfunc(const char *epath, int eerrno),
+ glob_t *pglob) {
+ (void) pattern;
+ (void) flags;
+ (void) errfunc;
+ (void) pglob;
+ pglob->gl_pathc = 0;
+ pglob->gl_pathv = NULL;
+ pglob->gl_offs = 0;
+ return -1;
+}
+
+static void globfree(glob_t *pglob) {
+ (void) pglob;
+}
+
#else
#include <glob.h>
#endif
diff -Paur --no-dereference -- libxml2.upstream/testrecurse.c libxml2/testrecurse.c
--- libxml2.upstream/testrecurse.c
+++ libxml2/testrecurse.c
@@ -146,6 +146,33 @@
}
}
+#elif defined(__sortix__)
+
+typedef struct
+{
+ size_t gl_pathc; /* Count of paths matched so far */
+ char **gl_pathv; /* List of matched pathnames. */
+ size_t gl_offs; /* Slots to reserve in 'gl_pathv'. */
+} glob_t;
+
+#define GLOB_DOOFFS 0
+static int glob(const char *pattern, int flags,
+ int errfunc(const char *epath, int eerrno),
+ glob_t *pglob) {
+ (void) pattern;
+ (void) flags;
+ (void) errfunc;
+ (void) pglob;
+ pglob->gl_pathc = 0;
+ pglob->gl_pathv = NULL;
+ pglob->gl_offs = 0;
+ return -1;
+}
+
+static void globfree(glob_t *pglob) {
+ (void) pglob;
+}
+
#else
#include <glob.h>
#endif

View File

@ -313,7 +313,7 @@ diff -Paur --no-dereference -- mdocml.upstream/eqn.c mdocml/eqn.c
diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c
--- mdocml.upstream/main.c
+++ mdocml/main.c
@@ -19,14 +19,16 @@
@@ -19,7 +19,6 @@
#include "config.h"
#include <sys/types.h>
@ -321,50 +321,7 @@ diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c
#include <sys/wait.h>
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
+/* PATCH: Sortix doesn't have glob.h at this time. */
+#if defined(__has_include) && __has_include(<glob.h>)
#include <glob.h>
+#endif
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
@@ -531,10 +533,16 @@
const char *sec, const char *arch, const char *name,
struct manpage **res, size_t *ressz)
{
+#if defined(__has_include) && __has_include(<glob.h>)
glob_t globinfo;
+#endif
struct manpage *page;
char *file;
+#if defined(__has_include) && __has_include(<glob.h>)
int form, globres;
+#else
+ int form;
+#endif
form = FORM_SRC;
mandoc_asprintf(&file, "%s/man%s/%s.%s",
@@ -559,6 +567,7 @@
free(file);
}
+#if defined(__has_include) && __has_include(<glob.h>)
mandoc_asprintf(&file, "%s/man%s/%s.*",
paths->paths[ipath], sec, name);
globres = glob(file, 0, NULL, &globinfo);
@@ -570,6 +579,7 @@
file = mandoc_strdup(*globinfo.gl_pathv);
globfree(&globinfo);
if (globres != 0)
+#endif
return(0);
found:
@@ -606,7 +616,38 @@
@@ -606,7 +605,38 @@
*res = NULL;
*ressz = lastsz = 0;
while (argc) {
@ -404,7 +361,7 @@ diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c
if (cfg->sec != NULL) {
if (fs_lookup(paths, ipath, cfg->sec,
cfg->arch, *argv, res, ressz) &&
@@ -989,7 +1030,7 @@
@@ -989,7 +1019,7 @@
if (pager == NULL || *pager == '\0')
pager = getenv("PAGER");
if (pager == NULL || *pager == '\0')
@ -413,7 +370,7 @@ diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c
cp = mandoc_strdup(pager);
/*
@@ -1014,7 +1055,7 @@
@@ -1014,7 +1044,7 @@
/* Hand over to the pager. */
execvp(argv[0], argv);

View File

@ -48,17 +48,7 @@ diff -Paur --no-dereference -- nano.upstream/src/nano.c nano/src/nano.c
diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c
--- nano.upstream/src/rcfile.c
+++ nano/src/rcfile.c
@@ -27,7 +27,9 @@
#include <ctype.h>
#include <errno.h>
+#if __has_include(<glob.h>)
#include <glob.h>
+#endif
#include <string.h>
#include <unistd.h>
@@ -577,6 +579,11 @@
@@ -577,6 +577,11 @@
* null-terminate it, and return a pointer to the succeeding text. */
char *parse_next_regex(char *ptr)
{
@ -70,7 +60,7 @@ diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c
char *starting_point = ptr;
if (*(ptr - 1) != '"') {
@@ -584,11 +591,28 @@
@@ -584,11 +589,28 @@
return NULL;
}
@ -103,7 +93,7 @@ diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c
if (*ptr == '\0') {
jot_error(N_("Regex strings must begin and end with a \" character"));
@@ -601,7 +625,8 @@
@@ -601,7 +623,8 @@
}
/* Null-terminate the regex and skip until the next non-blank. */
@ -113,32 +103,3 @@ diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c
while (isblank((unsigned char)*ptr))
ptr++;
@@ -966,8 +991,10 @@
void parse_includes(char *ptr)
{
char *pattern, *expanded;
+#if __has_include(<glob.h>)
glob_t files;
int result;
+#endif
check_for_nonempty_syntax();
@@ -978,6 +1005,7 @@
/* Expand a tilde first, then try to match the globbing pattern. */
expanded = real_dir_from_tilde(pattern);
+#if __has_include(<glob.h>)
result = glob(expanded, GLOB_ERR, NULL, &files);
/* If there are matches, process each of them. Otherwise, only
@@ -989,6 +1017,9 @@
jot_error(N_("Error expanding %s: %s"), pattern, strerror(errno));
globfree(&files);
+#else
+ parse_one_include(expanded);
+#endif
free(expanded);
}

284
sh/sh.c
View File

@ -27,6 +27,7 @@
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <glob.h>
#include <inttypes.h>
#include <ioleast.h>
#include <libgen.h>
@ -106,21 +107,6 @@ void update_env(void)
}
}
bool matches_simple_pattern(const char* string, const char* pattern)
{
size_t wildcard_index = strcspn(pattern, "*");
if ( !pattern[wildcard_index] )
return strcmp(string, pattern) == 0;
if ( pattern[0] == '*' && string[0] == '.' )
return false;
size_t string_length = strlen(string);
size_t pattern_length = strlen(pattern);
size_t pattern_last = pattern_length - (wildcard_index + 1);
return strncmp(string, pattern, wildcard_index) == 0 &&
strcmp(string + string_length - pattern_last,
pattern + wildcard_index + 1) == 0;
}
void array_shrink_free(void*** array_ptr,
size_t* used_ptr,
size_t* length_ptr,
@ -347,13 +333,6 @@ bool token_expand_variables_split(void*** out,
return result;
}
static int strcoll_indirect(const void* a_ptr, const void* b_ptr)
{
const char* a = *(const char* const*) a_ptr;
const char* b = *(const char* const*) b_ptr;
return strcoll(a, b);
}
bool token_expand_wildcards(void*** out,
size_t* out_used,
size_t* out_length,
@ -361,217 +340,112 @@ bool token_expand_wildcards(void*** out,
{
size_t old_used = *out_used;
size_t index = 0;
size_t num_escaped_wildcards = 0; // We don't properly support them yet.
struct stringbuf buf;
stringbuf_begin(&buf);
// First check if the token contains any wildcards at all.
bool escape = false;
bool single_quote = false;
bool double_quote = false;
for ( ; token[index]; index++ )
bool any_wildcards = false;
for ( size_t i = 0; token[i]; i++ )
{
if ( !escape && !single_quote && token[index] == '\\' )
{
char c = token[i];
if ( !escape && !single_quote && c == '\\' )
escape = true;
}
else if ( !escape && !double_quote && token[index] == '\'' )
{
else if ( !escape && !double_quote && c == '\'' )
single_quote = !single_quote;
}
else if ( !escape && !single_quote && token[index] == '"' )
{
else if ( !escape && !single_quote && c == '"' )
double_quote = !double_quote;
}
else if ( !(escape || single_quote || double_quote) &&
token[index] == '*' )
(c == '?' || c == '*' || c == '[') )
{
break;
any_wildcards = true;
stringbuf_append_c(&buf, c);
}
else
{
if ( escape && double_quote &&
token[index] != '$' && token[index] != '`' &&
token[index] != '"' && token[index] != '\\' )
c != '$' && c != '`' && c != '"' && c != '\\' )
stringbuf_append_c(&buf, '\\');
if ( token[index] == '*' )
num_escaped_wildcards++;
stringbuf_append_c(&buf, token[index]);
else if ( (escape || single_quote || double_quote) &&
(c == '?' || c == '*' || c == '[') )
stringbuf_append_c(&buf, '\\');
stringbuf_append_c(&buf, c);
escape = false;
}
}
if ( token[index] != '*' || num_escaped_wildcards )
char* pattern = stringbuf_finish(&buf);
if ( !pattern )
return false;
// If the token didn't contain any wildcards, just return it.
if ( !any_wildcards )
{
char* value;
free(stringbuf_finish(&buf));
free(pattern);
just_return_input:
value = strdup(token);
if ( !value )
pattern = strdup(token);
if ( !pattern )
return false;
if ( !array_add(out, out_used, out_length, value) )
return free(value), false;
if ( !array_add(out, out_used, out_length, pattern) )
return free(pattern), false;
return true;
}
char* before = stringbuf_finish(&buf);
if ( !before )
return false;
stringbuf_begin(&buf);
index++;
for ( ; token[index]; index++ )
{
if ( !escape && !single_quote && token[index] == '\\' )
{
escape = true;
}
else if ( !escape && !double_quote && token[index] == '\'' )
{
single_quote = !single_quote;
}
else if ( !escape && !single_quote && token[index] == '"' )
{
double_quote = !double_quote;
}
else if ( !(escape || single_quote || double_quote) &&
token[index] == '*' )
{
break;
}
else
{
if ( escape && double_quote &&
token[index] != '$' && token[index] != '`' &&
token[index] != '"' && token[index] != '\\' )
stringbuf_append_c(&buf, '\\');
if ( token[index] == '*' )
num_escaped_wildcards++;
stringbuf_append_c(&buf, token[index]);
escape = false;
}
}
if ( token[index] == '*' )
{
// TODO: We don't support double use of wildcards yet.
free(stringbuf_finish(&buf));
free(before);
goto just_return_input;
}
char* after = stringbuf_finish(&buf);
if ( !after )
return free(before), false;
char* pattern;
if ( asprintf(&pattern, "%s*%s", before, after) < 0 )
return free(after), free(before), false;
free(after);
free(before);
size_t wildcard_pos = strcspn(pattern, "*");
bool found_slash = false;
size_t last_slash = 0;
for ( size_t n = 0; n < wildcard_pos; n++ )
if ( pattern[n] == '/' )
last_slash = n, found_slash = true;
size_t match_from = found_slash ? last_slash + 1 : 0;
size_t pattern_prefix = 0;
DIR* dir = NULL;
if ( !found_slash )
{
if ( !(dir = opendir(".")) )
{
free(pattern);
goto just_return_input;
}
}
else
{
char* dirpath = strdup(pattern);
if ( !dirpath )
{
free(pattern);
goto just_return_input;
}
dirpath[last_slash] = '\0';
pattern_prefix = last_slash + 1;
dir = opendir(dirpath);
free(dirpath);
if ( !dir )
{
free(pattern);
goto just_return_input;
}
}
size_t num_inserted = 0;
struct dirent* entry;
while ( (entry = readdir(dir)) )
{
if ( !matches_simple_pattern(entry->d_name, pattern + match_from) )
continue;
stringbuf_begin(&buf);
for ( size_t i = 0; i < pattern_prefix; i++ )
{
if ( pattern[i] == '\n' )
{
stringbuf_append_c(&buf, '\'');
stringbuf_append_c(&buf, '\n');
stringbuf_append_c(&buf, '\'');
}
else
{
if ( might_need_shell_quote(pattern[i]) )
stringbuf_append_c(&buf, '\\');
stringbuf_append_c(&buf, pattern[i]);
}
}
for ( size_t i = 0; entry->d_name[i]; i++ )
{
if ( entry->d_name[i] == '\n' )
{
stringbuf_append_c(&buf, '\'');
stringbuf_append_c(&buf, '\n');
stringbuf_append_c(&buf, '\'');
}
else
{
if ( might_need_shell_quote(entry->d_name[i]) )
stringbuf_append_c(&buf, '\\');
stringbuf_append_c(&buf, entry->d_name[i]);
}
}
char* name = stringbuf_finish(&buf);
if ( !name )
{
free(pattern);
closedir(dir);
array_shrink_free(out, out_used, out_length, old_used);
return false;
}
if ( !array_add(out, out_used, out_length, name) )
{
free(name);
free(pattern);
closedir(dir);
array_shrink_free(out, out_used, out_length, old_used);
return false;
}
num_inserted++;
}
closedir(dir);
// Search the filesystem for paths matching the pattern.
glob_t gl;
int globerr = glob(pattern, 0, NULL, &gl);
free(pattern);
if ( num_inserted == 0 )
goto just_return_input;
char** out_tokens;
memcpy(&out_tokens, out, sizeof(out_tokens));
char** sort_from = out_tokens + old_used;
size_t sort_count = *out_used - old_used;
qsort(sort_from, sort_count, sizeof(char*), strcoll_indirect);
if ( globerr )
{
globfree(&gl);
// GLOB_NOCHECK is not used since we don't want the escaped pattern back
// since it would contain e.g. \* which is difficult to discern from a
// real file actually called \* and the original token is escaped in the
// correct fashion.
if ( globerr == GLOB_NOMATCH )
goto just_return_input;
return false;
}
// Escape the paths as tokens.
for ( size_t n = 0; n < gl.gl_pathc; n++ )
{
const char* path = gl.gl_pathv[n];
stringbuf_begin(&buf);
for ( size_t i = 0; path[i]; i++ )
{
if ( path[i] == '\n' )
{
stringbuf_append_c(&buf, '\'');
stringbuf_append_c(&buf, '\n');
stringbuf_append_c(&buf, '\'');
}
else
{
if ( might_need_shell_quote(path[i]) )
stringbuf_append_c(&buf, '\\');
stringbuf_append_c(&buf, path[i]);
}
}
char* new_token = stringbuf_finish(&buf);
if ( !new_token )
{
globfree(&gl);
array_shrink_free(out, out_used, out_length, old_used);
return false;
}
if ( !array_add(out, out_used, out_length, new_token) )
{
free(new_token);
globfree(&gl);
array_shrink_free(out, out_used, out_length, old_used);
return false;
}
}
globfree(&gl);
return true;
}