diff --git a/libc/Makefile b/libc/Makefile index 6fde6d8e..ef4f20de 100644 --- a/libc/Makefile +++ b/libc/Makefile @@ -367,6 +367,8 @@ fstab/scanfsent.o \ fstab/setfsent.o \ getopt/getopt_long.o \ getopt/getopt.o \ +glob/glob.o \ +glob/globfree.o \ grp/endgrent.o \ grp/fgetgrent.o \ grp/fgetgrent_r.o \ diff --git a/libc/glob/glob.c b/libc/glob/glob.c new file mode 100644 index 00000000..6b549cf3 --- /dev/null +++ b/libc/glob/glob.c @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2023 Jonas 'Sortie' Termansen. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * glob/glob.c + * Search for paths matching a pattern. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int strcoll_indirect(const void* a_ptr, const void* b_ptr) +{ + const char* a = *(const char* const*) a_ptr; + const char* b = *(const char* const*) b_ptr; + return strcoll(a, b); +} + +struct segment +{ + char* prefix; + size_t prefix_length; + DIR* dir; + bool trivial; + bool leading_period; + bool match_directory; + bool done; + union + { + struct + { + size_t start; + size_t length; + }; + regex_t regex; + }; +}; + +int glob(const char* restrict pattern, + int flags, + int (*errfunc)(const char*, int), + glob_t* restrict gl) +{ + if ( !(flags & GLOB_DOOFFS) ) + gl->gl_offs = 0; + if ( !(flags & GLOB_APPEND) ) + { + gl->gl_pathv = NULL; + gl->gl_pathc = 0; + } + if ( gl->gl_offs == SIZE_MAX ) + return GLOB_NOSPACE; + size_t initial_pathc = gl->gl_pathc; + // Reserve room for at least one string and the trailing null to prevent + // the possibly of late errors in the GLOB_NOCHECK case. + size_t pathl; + if ( __builtin_add_overflow(gl->gl_offs, gl->gl_pathc, &pathl) || + __builtin_add_overflow(pathl, 2, &pathl) ) + return GLOB_NOSPACE; + char** new_pathv = reallocarray(gl->gl_pathv, pathl, sizeof(char*)); + if ( !new_pathv ) + return GLOB_NOSPACE; + gl->gl_pathv = new_pathv; + size_t paths_length = gl->gl_pathc + 1; + // Parse the pattern into segments where trivial segments are fixed path + // components that can be directly opened and non-trivial segments require + // searching a directory for entries that match the pattern. + struct segment* segments = NULL; + size_t segments_count = 0; + size_t segments_length = 0; + int result = 0; + for ( size_t offset = 0; pattern[offset] && !result; ) + { + // Combine multiple trivial path components into a trivial segment, but + // each non-trivial path component must be its own segment. + size_t segment_length = 0; + bool is_trivial = true; + for ( size_t i = 0; pattern[offset + i]; i++ ) + { + if ( pattern[offset + i] == '*' || + pattern[offset + i] == '?' || + pattern[offset + i] == '[' ) + { + if ( segment_length ) + break; + is_trivial = false; + } + if ( pattern[offset + i] == '/' || !pattern[offset + i + 1] ) + { + segment_length = i + 1; + if ( !is_trivial ) + break; + } + } + // Grow the list of segments as needed. + if ( segments_count == segments_length ) + { + size_t old_length = segments_length ? segments_length : 1; + struct segment* new_segments = + reallocarray(segments, old_length, 2 * sizeof(struct segment)); + if ( !new_segments ) + { + result = GLOB_NOSPACE; + break; + } + segments = new_segments; + segments_length = 2 * old_length; + } + struct segment* segment = &segments[segments_count++]; + segment->match_directory = pattern[offset + segment_length - 1] == '/'; + // Trivial segments just contain the pattern indices to directly open. + if ( (segment->trivial = is_trivial) ) + { + segment->start = offset; + segment->length = segment_length; + } + // Non-trivial segments are translated to a regular expression that is + // compiled right now so it can be efficiently reused during the search. + else + { + // Match a leading period only if the pattern explicitly starts with + // a period. POSIX requires that leading periods aren't matched by + // the * and ? and [ operators, but also are not matched by negated + // patterns like [^a]. It's unspecified whether [.] would match a + // leading period. Although regular expressions can express such + // patterns, it's difficult to translate, and it's much easier to + // just special case the leading period like this. + segment->leading_period = pattern[offset] == '.'; + char* re = NULL; + size_t re_size; + FILE* fp = open_memstream(&re, &re_size); + bool escaped = false; + fputc('^', fp); + // Translate the pattern to an extended regular expression. + for ( size_t i = 0; fp && i < segment_length; i++ ) + { + unsigned char c = pattern[offset + i]; + if ( !escaped && c == '*' ) + fputs(".*", fp); + else if ( !escaped && c == '?' ) + fputs(".", fp); + else if ( !escaped && c == '[' ) + { + // The whole character range is passed directly to regcomp + // so the correct end just has to be found, taking the edge + // cases into account. POSIX requires using ! instead of ^ + // for character range negations. As an extension, ^ is + // also just passed directly to regcomp and works. + const char* expr = pattern + offset + i; + size_t max = segment_length - i; + size_t len = 1; + if ( len < max && (expr[len] == '!' || expr[len] == '^') ) + len++; + if ( len < max && expr[len] == ']' ) + len++; + while ( len < max && expr[len] != ']' ) + { + if ( 2 <= max - len && expr[len] == '[' && + (expr[len + 1] == '.' || expr[len + 1] == '=' || + expr[len + 1] == ':' ) ) + { + char t = expr[len + 1]; + len += 2; + while ( 2 <= max - len && + !(expr[len] == t && expr[len + 1] == ']') ) + len++; + len += max - len < 2 ? max - len : 2; + } + else + len++; + } + if ( len < max && expr[len] == ']' ) + { + for ( size_t n = 0; n <= len; n++ ) + { + if ( n == 1 && expr[n] == '!' ) + fputc('^', fp); + else + fputc((unsigned char) expr[n], fp); + } + i += len; + } + else + fputs("\\[", fp); + } + else if ( !escaped && c == '\\' && !(flags & GLOB_NOESCAPE) ) + escaped = true; + else if ( c != '/' ) + { + if ( c == '\\' || c == '(' || c == ')' || c == '{' || + c == '}' || c == '.' || c == '*' || c == '[' || + c == ']' || c == '^' || c == '$' || c == '+' || + c == '?' || c == '|' ) + fputc('\\', fp); + fputc(c, fp); + escaped = false; + } + } + fputc('$', fp); + if ( !fp || ferror(fp) || fflush(fp) == EOF ) + { + if ( fp ) + fclose(fp); + free(re); + result = GLOB_NOSPACE; + segments_count--; + break; + } + fclose(fp); + // Compile and reuse the regular expression for this segment. + int ret = regcomp(&segment->regex, re, REG_EXTENDED); + free(re); + if ( ret ) + { + result = GLOB_NOSPACE; + segments_count--; + break; + } + } + offset += segment_length; + } + // Start the search with the first segment. + if ( !result && segments_count ) + { + segments[0].prefix = NULL; + segments[0].prefix_length = 0; + segments[0].dir = NULL; + segments[0].done = false; + // If the first segment is non-trivial then the current working + // directory needs to be opened and searched. + if ( !segments[0].trivial && !(segments[0].dir = opendir(".")) ) + { + if ( errno == ENOMEM ) + result = GLOB_NOSPACE; + else if ( (errfunc && errfunc(".", errno)) || (flags & GLOB_ERR) ) + result = GLOB_ABORTED; + else + segments[0].done = true; + } + } + // Search the filesystem depth first for paths matching the pattern. The + // segments array is used for the hierarchical state to avoid recursion. + // Each active segment has a directory currently being searched and yields + // paths to be explored by the subsequent segment. The last segment adds + // paths to the output array if they match the pattern. The search is + // complete when the outermost segment is done or has failed. + size_t current_segment = 0; + while ( segments_count && + (current_segment || !(segments[0].done || result) )) + { + struct segment* segment = &segments[current_segment]; + // Pop to the the parent segment if the directory has been searched or + // if an error has happened and the search is aborting. + if ( segment->done || result ) + { + free(segment->prefix); + segment->prefix = NULL; + if ( segment->dir ) + closedir(segment->dir); + current_segment--; + continue; + } + char* name; + size_t name_length; + unsigned char type = DT_UNKNOWN; + // A trivial segment yields only the singular path it can match. + if ( segment->trivial ) + { + name = strndup(pattern + segment->start, segment->length); + name_length = segment->length; + segment->done = true; + } + // Search the directory for entries matching the pattern. + else + { + errno = 0; + struct dirent* entry = readdir(segment->dir); + if ( !entry ) + { + const char* path = segment->prefix ? segment->prefix : "."; + if ( errno == ENOMEM ) + result = GLOB_NOSPACE; + else if ( (errfunc && errfunc(path, errno)) || + (flags & GLOB_ERR) ) + result = GLOB_ABORTED; + segment->done = true; + continue; + } + // Skip known non-directories when a directory needs to be found. + if ( (current_segment + 1 < segments_count || + segment->match_directory) && + entry->d_type != DT_UNKNOWN && + entry->d_type != DT_DIR && + entry->d_type != DT_LNK ) + continue; + if ( !strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..") ) + continue; + if ( entry->d_name[0] == '.' && !segment->leading_period ) + continue; + if ( regexec(&segment->regex, entry->d_name, 0, NULL, 0) ) + continue; + name = strdup(entry->d_name); + name_length = strlen(entry->d_name); + type = entry->d_type; + } + if ( !name ) + { + result = GLOB_NOSPACE; + continue; + } + // Append the segment's prefix with the name but keep an extra byte for + // a possible trailing slash and of course the terminating nul byte. + size_t size = 0; + if ( __builtin_add_overflow(segment->prefix_length, name_length, + &size) || + __builtin_add_overflow(size, 1 + 1, &size) ) + { + free(name); + result = GLOB_NOSPACE; + continue; + } + char* path = malloc(size); + if ( !path ) + { + free(name); + free(path); + result = GLOB_NOSPACE; + continue; + } + if ( segment->prefix_length ) + memcpy(path, segment->prefix, segment->prefix_length); + memcpy(path + segment->prefix_length, name, name_length); + path[segment->prefix_length + name_length] = '\0'; + int fd = segment->dir ? dirfd(segment->dir) : AT_FDCWD; + // If this is not the last segment, push to the next segment to search + // the directory just found. + if ( current_segment + 1 < segments_count ) + { + struct segment* next_segment = &segments[current_segment + 1]; + int mode = next_segment->trivial ? O_SEARCH : O_RDONLY; + int subdirfd = openat(fd, name, mode | O_DIRECTORY | O_CLOEXEC); + free(name); + next_segment->dir = subdirfd < 0 ? NULL : fdopendir(subdirfd); + if ( !next_segment->dir ) + { + if ( 0 <= subdirfd ) + close(subdirfd); + if ( errno != ENOENT && errno != ENOTDIR && + ((errfunc && errfunc(path, errno)) || (flags & GLOB_ERR)) ) + result = GLOB_ABORTED; + free(path); + continue; + } + next_segment->prefix = path; + next_segment->prefix_length = size - 2; + // Add a trailing slash to the searched directory entries. + if ( !segment->trivial ) + { + next_segment->prefix[next_segment->prefix_length++] = '/'; + next_segment->prefix[next_segment->prefix_length] = '\0'; + } + next_segment->done = false; + current_segment++; + continue; + } + // The last segment just needs to output paths if they exist. + else + { + bool want_slash = (flags & GLOB_MARK) || segment->match_directory; + bool exists = true, is_dir = false; + // The path is known to already exist for non-trivial segments since + // it was returned by readdir, but we may need to check if the path + // is a directory if readdir didn't tell us already. + if ( !segment->trivial && + (!want_slash || (type != DT_UNKNOWN && type != DT_LNK)) ) + is_dir = type == DT_DIR; + // Just check if the path exists if we don't add slashes to dirs. + else if ( !want_slash ) + exists = !faccessat(fd, name, F_OK, AT_SYMLINK_NOFOLLOW); + // Otherwise use the slower stat operation to obtain the inode type. + else + { + struct stat st; + exists = !fstatat(fd, name, &st, AT_SYMLINK_NOFOLLOW); + if ( want_slash && S_ISLNK(st.st_mode) ) + fstatat(fd, name, &st, 0); + is_dir = S_ISDIR(st.st_mode); + } + free(name); + if ( segment->match_directory && !is_dir ) + { + free(path); + continue; + } + if ( want_slash && path[size - 3] != '/' ) + path[size - 2] = '/', path[size - 1] = '\0'; + if ( !exists ) + { + if ( errno != ENOENT && + ((errfunc && errfunc(path, errno)) || (flags & GLOB_ERR)) ) + result = GLOB_ABORTED; + free(path); + continue; + } + // Grow the output array as needed. + if ( gl->gl_pathc == paths_length ) + { + size_t old_pathc = gl->gl_pathc ? gl->gl_pathc : 1; + if ( __builtin_mul_overflow(2, old_pathc, &pathl) || + __builtin_add_overflow(gl->gl_offs, pathl, &pathl) || + __builtin_add_overflow(1, pathl, &pathl) || + !(new_pathv = reallocarray(gl->gl_pathv, pathl, + sizeof(char*))) ) + { + free(path); + result = GLOB_NOSPACE; + continue; + } + gl->gl_pathv = new_pathv; + paths_length = old_pathc * 2; + } + gl->gl_pathv[gl->gl_offs + gl->gl_pathc++] = path; + } + } + // Clean up the segments and free the compiled regular expressions. + if ( segments_count && segments[0].dir ) + closedir(segments[0].dir); + for ( size_t i = 0; i < segments_count; i++ ) + if ( !segments[i].trivial ) + regfree(&segments[i].regex); + free(segments); + // Output the input pattern if nothing matched when GLOB_NOCHECK. + if ( !result && gl->gl_pathc == initial_pathc ) + { + if ( (flags & GLOB_NOCHECK) ) + { + if ( (gl->gl_pathv[gl->gl_offs] = strdup(pattern)) ) + gl->gl_pathc = 1; + else + result = GLOB_NOSPACE; + } + else + result = GLOB_NOMATCH; + } + // Sort the new entries per LC_COLLATE per POSIX. + if ( !(flags & GLOB_NOSORT) ) + qsort(gl->gl_pathv + gl->gl_offs + initial_pathc, + gl->gl_pathc - initial_pathc, sizeof(char*), + strcoll_indirect); + gl->gl_pathv[gl->gl_offs + gl->gl_pathc] = NULL; + return result; +} diff --git a/libc/glob/globfree.c b/libc/glob/globfree.c new file mode 100644 index 00000000..1cadf64a --- /dev/null +++ b/libc/glob/globfree.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023 Jonas 'Sortie' Termansen. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * glob/globfree.c + * Free allocate glob storage. + */ + +#include +#include + +void globfree(glob_t* gl) +{ + for ( size_t i = 0; i < gl->gl_pathc; i++ ) + free(gl->gl_pathv[gl->gl_offs + i]); + free(gl->gl_pathv); +} diff --git a/libc/include/glob.h b/libc/include/glob.h new file mode 100644 index 00000000..f7acfc7f --- /dev/null +++ b/libc/include/glob.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023 Jonas 'Sortie' Termansen. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * glob.h + * Search for paths matching a pattern. + */ + +#ifndef _INCLUDE_GLOB_H +#define _INCLUDE_GLOB_H + +#include + +#ifndef __size_t_defined +#define __size_t_defined +#define __need_size_t +#include +#endif + +typedef struct +{ + size_t gl_pathc; + char** gl_pathv; + size_t gl_offs; +} glob_t; + +#define GLOB_APPEND (1 << 0) +#define GLOB_DOOFFS (1 << 1) +#define GLOB_ERR (1 << 2) +#define GLOB_MARK (1 << 3) +#define GLOB_NOCHECK (1 << 4) +#define GLOB_NOESCAPE (1 << 5) +#define GLOB_NOSORT (1 << 6) + +#define GLOB_ABORTED 1 +#define GLOB_NOMATCH 2 +#define GLOB_NOSPACE 3 + +#ifdef __cplusplus +extern "C" { +#endif + +int glob(const char* __restrict, int, int (*)(const char *, int), + glob_t* __restrict); +void globfree(glob_t*); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/ports/libxml2/libxml2.patch b/ports/libxml2/libxml2.patch index 75cab5bb..e025d244 100644 --- a/ports/libxml2/libxml2.patch +++ b/ports/libxml2/libxml2.patch @@ -157,77 +157,3 @@ diff -Paur --no-dereference -- libxml2.upstream/nanohttp.c libxml2/nanohttp.c { struct hostent *h; struct in_addr ia; -diff -Paur --no-dereference -- libxml2.upstream/runtest.c libxml2/runtest.c ---- libxml2.upstream/runtest.c -+++ libxml2/runtest.c -@@ -198,6 +198,33 @@ - } - } - -+#elif defined(__sortix__) -+ -+typedef struct -+{ -+ size_t gl_pathc; /* Count of paths matched so far */ -+ char **gl_pathv; /* List of matched pathnames. */ -+ size_t gl_offs; /* Slots to reserve in 'gl_pathv'. */ -+} glob_t; -+ -+#define GLOB_DOOFFS 0 -+static int glob(const char *pattern, int flags, -+ int errfunc(const char *epath, int eerrno), -+ glob_t *pglob) { -+ (void) pattern; -+ (void) flags; -+ (void) errfunc; -+ (void) pglob; -+ pglob->gl_pathc = 0; -+ pglob->gl_pathv = NULL; -+ pglob->gl_offs = 0; -+ return -1; -+} -+ -+static void globfree(glob_t *pglob) { -+ (void) pglob; -+} -+ - #else - #include - #endif -diff -Paur --no-dereference -- libxml2.upstream/testrecurse.c libxml2/testrecurse.c ---- libxml2.upstream/testrecurse.c -+++ libxml2/testrecurse.c -@@ -146,6 +146,33 @@ - } - } - -+#elif defined(__sortix__) -+ -+typedef struct -+{ -+ size_t gl_pathc; /* Count of paths matched so far */ -+ char **gl_pathv; /* List of matched pathnames. */ -+ size_t gl_offs; /* Slots to reserve in 'gl_pathv'. */ -+} glob_t; -+ -+#define GLOB_DOOFFS 0 -+static int glob(const char *pattern, int flags, -+ int errfunc(const char *epath, int eerrno), -+ glob_t *pglob) { -+ (void) pattern; -+ (void) flags; -+ (void) errfunc; -+ (void) pglob; -+ pglob->gl_pathc = 0; -+ pglob->gl_pathv = NULL; -+ pglob->gl_offs = 0; -+ return -1; -+} -+ -+static void globfree(glob_t *pglob) { -+ (void) pglob; -+} -+ - #else - #include - #endif diff --git a/ports/mdocml/mdocml.patch b/ports/mdocml/mdocml.patch index 9cc8d3a4..1b90f479 100644 --- a/ports/mdocml/mdocml.patch +++ b/ports/mdocml/mdocml.patch @@ -313,7 +313,7 @@ diff -Paur --no-dereference -- mdocml.upstream/eqn.c mdocml/eqn.c diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c --- mdocml.upstream/main.c +++ mdocml/main.c -@@ -19,14 +19,16 @@ +@@ -19,7 +19,6 @@ #include "config.h" #include @@ -321,50 +321,7 @@ diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c #include #include - #include - #include - #include -+/* PATCH: Sortix doesn't have glob.h at this time. */ -+#if defined(__has_include) && __has_include() - #include -+#endif - #include - #include - #include -@@ -531,10 +533,16 @@ - const char *sec, const char *arch, const char *name, - struct manpage **res, size_t *ressz) - { -+#if defined(__has_include) && __has_include() - glob_t globinfo; -+#endif - struct manpage *page; - char *file; -+#if defined(__has_include) && __has_include() - int form, globres; -+#else -+ int form; -+#endif - - form = FORM_SRC; - mandoc_asprintf(&file, "%s/man%s/%s.%s", -@@ -559,6 +567,7 @@ - free(file); - } - -+#if defined(__has_include) && __has_include() - mandoc_asprintf(&file, "%s/man%s/%s.*", - paths->paths[ipath], sec, name); - globres = glob(file, 0, NULL, &globinfo); -@@ -570,6 +579,7 @@ - file = mandoc_strdup(*globinfo.gl_pathv); - globfree(&globinfo); - if (globres != 0) -+#endif - return(0); - - found: -@@ -606,7 +616,38 @@ +@@ -606,7 +605,38 @@ *res = NULL; *ressz = lastsz = 0; while (argc) { @@ -404,7 +361,7 @@ diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c if (cfg->sec != NULL) { if (fs_lookup(paths, ipath, cfg->sec, cfg->arch, *argv, res, ressz) && -@@ -989,7 +1030,7 @@ +@@ -989,7 +1019,7 @@ if (pager == NULL || *pager == '\0') pager = getenv("PAGER"); if (pager == NULL || *pager == '\0') @@ -413,7 +370,7 @@ diff -Paur --no-dereference -- mdocml.upstream/main.c mdocml/main.c cp = mandoc_strdup(pager); /* -@@ -1014,7 +1055,7 @@ +@@ -1014,7 +1044,7 @@ /* Hand over to the pager. */ execvp(argv[0], argv); diff --git a/ports/nano/nano.patch b/ports/nano/nano.patch index 9ce1818b..b7b22612 100644 --- a/ports/nano/nano.patch +++ b/ports/nano/nano.patch @@ -48,17 +48,7 @@ diff -Paur --no-dereference -- nano.upstream/src/nano.c nano/src/nano.c diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c --- nano.upstream/src/rcfile.c +++ nano/src/rcfile.c -@@ -27,7 +27,9 @@ - - #include - #include -+#if __has_include() - #include -+#endif - #include - #include - -@@ -577,6 +579,11 @@ +@@ -577,6 +577,11 @@ * null-terminate it, and return a pointer to the succeeding text. */ char *parse_next_regex(char *ptr) { @@ -70,7 +60,7 @@ diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c char *starting_point = ptr; if (*(ptr - 1) != '"') { -@@ -584,11 +591,28 @@ +@@ -584,11 +589,28 @@ return NULL; } @@ -103,7 +93,7 @@ diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c if (*ptr == '\0') { jot_error(N_("Regex strings must begin and end with a \" character")); -@@ -601,7 +625,8 @@ +@@ -601,7 +623,8 @@ } /* Null-terminate the regex and skip until the next non-blank. */ @@ -113,32 +103,3 @@ diff -Paur --no-dereference -- nano.upstream/src/rcfile.c nano/src/rcfile.c while (isblank((unsigned char)*ptr)) ptr++; -@@ -966,8 +991,10 @@ - void parse_includes(char *ptr) - { - char *pattern, *expanded; -+#if __has_include() - glob_t files; - int result; -+#endif - - check_for_nonempty_syntax(); - -@@ -978,6 +1005,7 @@ - - /* Expand a tilde first, then try to match the globbing pattern. */ - expanded = real_dir_from_tilde(pattern); -+#if __has_include() - result = glob(expanded, GLOB_ERR, NULL, &files); - - /* If there are matches, process each of them. Otherwise, only -@@ -989,6 +1017,9 @@ - jot_error(N_("Error expanding %s: %s"), pattern, strerror(errno)); - - globfree(&files); -+#else -+ parse_one_include(expanded); -+#endif - free(expanded); - } - diff --git a/sh/sh.c b/sh/sh.c index 2b382245..c257445f 100644 --- a/sh/sh.c +++ b/sh/sh.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -106,21 +107,6 @@ void update_env(void) } } -bool matches_simple_pattern(const char* string, const char* pattern) -{ - size_t wildcard_index = strcspn(pattern, "*"); - if ( !pattern[wildcard_index] ) - return strcmp(string, pattern) == 0; - if ( pattern[0] == '*' && string[0] == '.' ) - return false; - size_t string_length = strlen(string); - size_t pattern_length = strlen(pattern); - size_t pattern_last = pattern_length - (wildcard_index + 1); - return strncmp(string, pattern, wildcard_index) == 0 && - strcmp(string + string_length - pattern_last, - pattern + wildcard_index + 1) == 0; -} - void array_shrink_free(void*** array_ptr, size_t* used_ptr, size_t* length_ptr, @@ -347,13 +333,6 @@ bool token_expand_variables_split(void*** out, return result; } -static int strcoll_indirect(const void* a_ptr, const void* b_ptr) -{ - const char* a = *(const char* const*) a_ptr; - const char* b = *(const char* const*) b_ptr; - return strcoll(a, b); -} - bool token_expand_wildcards(void*** out, size_t* out_used, size_t* out_length, @@ -361,217 +340,112 @@ bool token_expand_wildcards(void*** out, { size_t old_used = *out_used; - size_t index = 0; - size_t num_escaped_wildcards = 0; // We don't properly support them yet. - struct stringbuf buf; stringbuf_begin(&buf); + // First check if the token contains any wildcards at all. bool escape = false; bool single_quote = false; bool double_quote = false; - for ( ; token[index]; index++ ) + bool any_wildcards = false; + for ( size_t i = 0; token[i]; i++ ) { - if ( !escape && !single_quote && token[index] == '\\' ) - { + char c = token[i]; + if ( !escape && !single_quote && c == '\\' ) escape = true; - } - else if ( !escape && !double_quote && token[index] == '\'' ) - { + else if ( !escape && !double_quote && c == '\'' ) single_quote = !single_quote; - } - else if ( !escape && !single_quote && token[index] == '"' ) - { + else if ( !escape && !single_quote && c == '"' ) double_quote = !double_quote; - } else if ( !(escape || single_quote || double_quote) && - token[index] == '*' ) + (c == '?' || c == '*' || c == '[') ) { - break; + any_wildcards = true; + stringbuf_append_c(&buf, c); } else { if ( escape && double_quote && - token[index] != '$' && token[index] != '`' && - token[index] != '"' && token[index] != '\\' ) + c != '$' && c != '`' && c != '"' && c != '\\' ) stringbuf_append_c(&buf, '\\'); - if ( token[index] == '*' ) - num_escaped_wildcards++; - stringbuf_append_c(&buf, token[index]); + else if ( (escape || single_quote || double_quote) && + (c == '?' || c == '*' || c == '[') ) + stringbuf_append_c(&buf, '\\'); + stringbuf_append_c(&buf, c); escape = false; } } - if ( token[index] != '*' || num_escaped_wildcards ) + char* pattern = stringbuf_finish(&buf); + if ( !pattern ) + return false; + + // If the token didn't contain any wildcards, just return it. + if ( !any_wildcards ) { - char* value; - free(stringbuf_finish(&buf)); + free(pattern); just_return_input: - value = strdup(token); - if ( !value ) + pattern = strdup(token); + if ( !pattern ) return false; - if ( !array_add(out, out_used, out_length, value) ) - return free(value), false; + if ( !array_add(out, out_used, out_length, pattern) ) + return free(pattern), false; return true; } - char* before = stringbuf_finish(&buf); - if ( !before ) - return false; - stringbuf_begin(&buf); - - index++; - - for ( ; token[index]; index++ ) - { - if ( !escape && !single_quote && token[index] == '\\' ) - { - escape = true; - } - else if ( !escape && !double_quote && token[index] == '\'' ) - { - single_quote = !single_quote; - } - else if ( !escape && !single_quote && token[index] == '"' ) - { - double_quote = !double_quote; - } - else if ( !(escape || single_quote || double_quote) && - token[index] == '*' ) - { - break; - } - else - { - if ( escape && double_quote && - token[index] != '$' && token[index] != '`' && - token[index] != '"' && token[index] != '\\' ) - stringbuf_append_c(&buf, '\\'); - if ( token[index] == '*' ) - num_escaped_wildcards++; - stringbuf_append_c(&buf, token[index]); - escape = false; - } - } - - if ( token[index] == '*' ) - { - // TODO: We don't support double use of wildcards yet. - free(stringbuf_finish(&buf)); - free(before); - goto just_return_input; - } - - char* after = stringbuf_finish(&buf); - if ( !after ) - return free(before), false; - - char* pattern; - if ( asprintf(&pattern, "%s*%s", before, after) < 0 ) - return free(after), free(before), false; - free(after); - free(before); - - size_t wildcard_pos = strcspn(pattern, "*"); - bool found_slash = false; - size_t last_slash = 0; - for ( size_t n = 0; n < wildcard_pos; n++ ) - if ( pattern[n] == '/' ) - last_slash = n, found_slash = true; - size_t match_from = found_slash ? last_slash + 1 : 0; - - size_t pattern_prefix = 0; - DIR* dir = NULL; - if ( !found_slash ) - { - if ( !(dir = opendir(".")) ) - { - free(pattern); - goto just_return_input; - } - } - else - { - char* dirpath = strdup(pattern); - if ( !dirpath ) - { - free(pattern); - goto just_return_input; - } - dirpath[last_slash] = '\0'; - pattern_prefix = last_slash + 1; - dir = opendir(dirpath); - free(dirpath); - if ( !dir ) - { - free(pattern); - goto just_return_input; - } - } - size_t num_inserted = 0; - struct dirent* entry; - while ( (entry = readdir(dir)) ) - { - if ( !matches_simple_pattern(entry->d_name, pattern + match_from) ) - continue; - stringbuf_begin(&buf); - for ( size_t i = 0; i < pattern_prefix; i++ ) - { - if ( pattern[i] == '\n' ) - { - stringbuf_append_c(&buf, '\''); - stringbuf_append_c(&buf, '\n'); - stringbuf_append_c(&buf, '\''); - } - else - { - if ( might_need_shell_quote(pattern[i]) ) - stringbuf_append_c(&buf, '\\'); - stringbuf_append_c(&buf, pattern[i]); - } - } - for ( size_t i = 0; entry->d_name[i]; i++ ) - { - if ( entry->d_name[i] == '\n' ) - { - stringbuf_append_c(&buf, '\''); - stringbuf_append_c(&buf, '\n'); - stringbuf_append_c(&buf, '\''); - } - else - { - if ( might_need_shell_quote(entry->d_name[i]) ) - stringbuf_append_c(&buf, '\\'); - stringbuf_append_c(&buf, entry->d_name[i]); - } - } - char* name = stringbuf_finish(&buf); - if ( !name ) - { - free(pattern); - closedir(dir); - array_shrink_free(out, out_used, out_length, old_used); - return false; - } - if ( !array_add(out, out_used, out_length, name) ) - { - free(name); - free(pattern); - closedir(dir); - array_shrink_free(out, out_used, out_length, old_used); - return false; - } - num_inserted++; - } - closedir(dir); + // Search the filesystem for paths matching the pattern. + glob_t gl; + int globerr = glob(pattern, 0, NULL, &gl); free(pattern); - if ( num_inserted == 0 ) - goto just_return_input; - char** out_tokens; - memcpy(&out_tokens, out, sizeof(out_tokens)); - char** sort_from = out_tokens + old_used; - size_t sort_count = *out_used - old_used; - qsort(sort_from, sort_count, sizeof(char*), strcoll_indirect); + if ( globerr ) + { + globfree(&gl); + // GLOB_NOCHECK is not used since we don't want the escaped pattern back + // since it would contain e.g. \* which is difficult to discern from a + // real file actually called \* and the original token is escaped in the + // correct fashion. + if ( globerr == GLOB_NOMATCH ) + goto just_return_input; + return false; + } + + // Escape the paths as tokens. + for ( size_t n = 0; n < gl.gl_pathc; n++ ) + { + const char* path = gl.gl_pathv[n]; + stringbuf_begin(&buf); + for ( size_t i = 0; path[i]; i++ ) + { + if ( path[i] == '\n' ) + { + stringbuf_append_c(&buf, '\''); + stringbuf_append_c(&buf, '\n'); + stringbuf_append_c(&buf, '\''); + } + else + { + if ( might_need_shell_quote(path[i]) ) + stringbuf_append_c(&buf, '\\'); + stringbuf_append_c(&buf, path[i]); + } + } + char* new_token = stringbuf_finish(&buf); + if ( !new_token ) + { + globfree(&gl); + array_shrink_free(out, out_used, out_length, old_used); + return false; + } + if ( !array_add(out, out_used, out_length, new_token) ) + { + free(new_token); + globfree(&gl); + array_shrink_free(out, out_used, out_length, old_used); + return false; + } + } + globfree(&gl); + return true; }