| /*************************************************************************** |
| * _ _ ____ _ |
| * Project ___| | | | _ \| | |
| * / __| | | | |_) | | |
| * | (__| |_| | _ <| |___ |
| * \___|\___/|_| \_\_____| |
| * |
| * Copyright (C) Daniel Stenberg, <[email protected]>, et al. |
| * |
| * This software is licensed as described in the file COPYING, which |
| * you should have received as part of this distribution. The terms |
| * are also available at https://curl.se/docs/copyright.html. |
| * |
| * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
| * copies of the Software, and permit persons to whom the Software is |
| * furnished to do so, under the terms of the COPYING file. |
| * |
| * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| * KIND, either express or implied. |
| * |
| * SPDX-License-Identifier: curl |
| * |
| ***************************************************************************/ |
| #include "tool_setup.h" |
| |
| #define ENABLE_CURLX_PRINTF |
| /* use our own printf() functions */ |
| #include "curlx.h" |
| #include "tool_cfgable.h" |
| #include "tool_doswin.h" |
| #include "tool_urlglob.h" |
| #include "tool_vms.h" |
| #include "dynbuf.h" |
| |
| #include "memdebug.h" /* keep this as LAST include */ |
| |
| #define GLOBERROR(string, column, code) \ |
| glob->error = string, glob->pos = column, code |
| |
| static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len) |
| { |
| struct URLPattern *pat = &glob->pattern[glob->size]; |
| pat->type = UPTSet; |
| pat->content.Set.size = 1; |
| pat->content.Set.ptr_s = 0; |
| pat->globindex = -1; |
| |
| pat->content.Set.elements = malloc(sizeof(char *)); |
| |
| if(!pat->content.Set.elements) |
| return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); |
| |
| pat->content.Set.elements[0] = malloc(len + 1); |
| if(!pat->content.Set.elements[0]) |
| return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); |
| |
| memcpy(pat->content.Set.elements[0], fixed, len); |
| pat->content.Set.elements[0][len] = 0; |
| |
| return CURLE_OK; |
| } |
| |
| /* multiply |
| * |
| * Multiplies and checks for overflow. |
| */ |
| static int multiply(unsigned long *amount, long with) |
| { |
| unsigned long sum = *amount * with; |
| if(!with) { |
| *amount = 0; |
| return 0; |
| } |
| if(sum/with != *amount) |
| return 1; /* didn't fit, bail out */ |
| *amount = sum; |
| return 0; |
| } |
| |
| static CURLcode glob_set(struct URLGlob *glob, char **patternp, |
| size_t *posp, unsigned long *amount, |
| int globindex) |
| { |
| /* processes a set expression with the point behind the opening '{' |
| ','-separated elements are collected until the next closing '}' |
| */ |
| struct URLPattern *pat; |
| bool done = FALSE; |
| char *buf = glob->glob_buffer; |
| char *pattern = *patternp; |
| char *opattern = pattern; |
| size_t opos = *posp-1; |
| |
| pat = &glob->pattern[glob->size]; |
| /* patterns 0,1,2,... correspond to size=1,3,5,... */ |
| pat->type = UPTSet; |
| pat->content.Set.size = 0; |
| pat->content.Set.ptr_s = 0; |
| pat->content.Set.elements = NULL; |
| pat->globindex = globindex; |
| |
| while(!done) { |
| switch (*pattern) { |
| case '\0': /* URL ended while set was still open */ |
| return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT); |
| |
| case '{': |
| case '[': /* no nested expressions at this time */ |
| return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT); |
| |
| case '}': /* set element completed */ |
| if(opattern == pattern) |
| return GLOBERROR("empty string within braces", *posp, |
| CURLE_URL_MALFORMAT); |
| |
| /* add 1 to size since it'll be incremented below */ |
| if(multiply(amount, pat->content.Set.size + 1)) |
| return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT); |
| |
| /* FALLTHROUGH */ |
| case ',': |
| |
| *buf = '\0'; |
| if(pat->content.Set.elements) { |
| char **new_arr = realloc(pat->content.Set.elements, |
| (pat->content.Set.size + 1) * sizeof(char *)); |
| if(!new_arr) |
| return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); |
| |
| pat->content.Set.elements = new_arr; |
| } |
| else |
| pat->content.Set.elements = malloc(sizeof(char *)); |
| |
| if(!pat->content.Set.elements) |
| return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); |
| |
| pat->content.Set.elements[pat->content.Set.size] = |
| strdup(glob->glob_buffer); |
| if(!pat->content.Set.elements[pat->content.Set.size]) |
| return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); |
| ++pat->content.Set.size; |
| |
| if(*pattern == '}') { |
| pattern++; /* pass the closing brace */ |
| done = TRUE; |
| continue; |
| } |
| |
| buf = glob->glob_buffer; |
| ++pattern; |
| ++(*posp); |
| break; |
| |
| case ']': /* illegal closing bracket */ |
| return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT); |
| |
| case '\\': /* escaped character, skip '\' */ |
| if(pattern[1]) { |
| ++pattern; |
| ++(*posp); |
| } |
| /* FALLTHROUGH */ |
| default: |
| *buf++ = *pattern++; /* copy character to set element */ |
| ++(*posp); |
| } |
| } |
| |
| *patternp = pattern; /* return with the new position */ |
| return CURLE_OK; |
| } |
| |
| static CURLcode glob_range(struct URLGlob *glob, char **patternp, |
| size_t *posp, unsigned long *amount, |
| int globindex) |
| { |
| /* processes a range expression with the point behind the opening '[' |
| - char range: e.g. "a-z]", "B-Q]" |
| - num range: e.g. "0-9]", "17-2000]" |
| - num range with leading zeros: e.g. "001-999]" |
| expression is checked for well-formedness and collected until the next ']' |
| */ |
| struct URLPattern *pat; |
| int rc; |
| char *pattern = *patternp; |
| char *c; |
| |
| pat = &glob->pattern[glob->size]; |
| pat->globindex = globindex; |
| |
| if(ISALPHA(*pattern)) { |
| /* character range detected */ |
| char min_c; |
| char max_c; |
| char end_c; |
| unsigned long step = 1; |
| |
| pat->type = UPTCharRange; |
| |
| rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c); |
| |
| if(rc == 3) { |
| if(end_c == ':') { |
| char *endp; |
| errno = 0; |
| step = strtoul(&pattern[4], &endp, 10); |
| if(errno || &pattern[4] == endp || *endp != ']') |
| step = 0; |
| else |
| pattern = endp + 1; |
| } |
| else if(end_c != ']') |
| /* then this is wrong */ |
| rc = 0; |
| else |
| /* end_c == ']' */ |
| pattern += 4; |
| } |
| |
| *posp += (pattern - *patternp); |
| |
| if(rc != 3 || !step || step > (unsigned)INT_MAX || |
| (min_c == max_c && step != 1) || |
| (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) || |
| (max_c - min_c) > ('z' - 'a')))) |
| /* the pattern is not well-formed */ |
| return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); |
| |
| /* if there was a ":[num]" thing, use that as step or else use 1 */ |
| pat->content.CharRange.step = (int)step; |
| pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; |
| pat->content.CharRange.max_c = max_c; |
| |
| if(multiply(amount, ((pat->content.CharRange.max_c - |
| pat->content.CharRange.min_c) / |
| pat->content.CharRange.step + 1))) |
| return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); |
| } |
| else if(ISDIGIT(*pattern)) { |
| /* numeric range detected */ |
| unsigned long min_n; |
| unsigned long max_n = 0; |
| unsigned long step_n = 0; |
| char *endp; |
| |
| pat->type = UPTNumRange; |
| pat->content.NumRange.padlength = 0; |
| |
| if(*pattern == '0') { |
| /* leading zero specified, count them! */ |
| c = pattern; |
| while(ISDIGIT(*c)) { |
| c++; |
| ++pat->content.NumRange.padlength; /* padding length is set for all |
| instances of this pattern */ |
| } |
| } |
| |
| errno = 0; |
| min_n = strtoul(pattern, &endp, 10); |
| if(errno || (endp == pattern)) |
| endp = NULL; |
| else { |
| if(*endp != '-') |
| endp = NULL; |
| else { |
| pattern = endp + 1; |
| while(*pattern && ISBLANK(*pattern)) |
| pattern++; |
| if(!ISDIGIT(*pattern)) { |
| endp = NULL; |
| goto fail; |
| } |
| errno = 0; |
| max_n = strtoul(pattern, &endp, 10); |
| if(errno) |
| /* overflow */ |
| endp = NULL; |
| else if(*endp == ':') { |
| pattern = endp + 1; |
| errno = 0; |
| step_n = strtoul(pattern, &endp, 10); |
| if(errno) |
| /* over/underflow situation */ |
| endp = NULL; |
| } |
| else |
| step_n = 1; |
| if(endp && (*endp == ']')) { |
| pattern = endp + 1; |
| } |
| else |
| endp = NULL; |
| } |
| } |
| |
| fail: |
| *posp += (pattern - *patternp); |
| |
| if(!endp || !step_n || |
| (min_n == max_n && step_n != 1) || |
| (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n)))) |
| /* the pattern is not well-formed */ |
| return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); |
| |
| /* typecasting to ints are fine here since we make sure above that we |
| are within 31 bits */ |
| pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; |
| pat->content.NumRange.max_n = max_n; |
| pat->content.NumRange.step = step_n; |
| |
| if(multiply(amount, ((pat->content.NumRange.max_n - |
| pat->content.NumRange.min_n) / |
| pat->content.NumRange.step + 1))) |
| return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); |
| } |
| else |
| return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT); |
| |
| *patternp = pattern; |
| return CURLE_OK; |
| } |
| |
| #define MAX_IP6LEN 128 |
| |
| static bool peek_ipv6(const char *str, size_t *skip) |
| { |
| /* |
| * Scan for a potential IPv6 literal. |
| * - Valid globs contain a hyphen and <= 1 colon. |
| * - IPv6 literals contain no hyphens and >= 2 colons. |
| */ |
| char hostname[MAX_IP6LEN]; |
| CURLU *u; |
| char *endbr = strchr(str, ']'); |
| size_t hlen; |
| CURLUcode rc; |
| if(!endbr) |
| return FALSE; |
| |
| hlen = endbr - str + 1; |
| if(hlen >= MAX_IP6LEN) |
| return FALSE; |
| |
| u = curl_url(); |
| if(!u) |
| return FALSE; |
| |
| memcpy(hostname, str, hlen); |
| hostname[hlen] = 0; |
| |
| /* ask to "guess scheme" as then it works without an https:// prefix */ |
| rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME); |
| |
| curl_url_cleanup(u); |
| if(!rc) |
| *skip = hlen; |
| return rc ? FALSE : TRUE; |
| } |
| |
| static CURLcode glob_parse(struct URLGlob *glob, char *pattern, |
| size_t pos, unsigned long *amount) |
| { |
| /* processes a literal string component of a URL |
| special characters '{' and '[' branch to set/range processing functions |
| */ |
| CURLcode res = CURLE_OK; |
| int globindex = 0; /* count "actual" globs */ |
| |
| *amount = 1; |
| |
| while(*pattern && !res) { |
| char *buf = glob->glob_buffer; |
| size_t sublen = 0; |
| while(*pattern && *pattern != '{') { |
| if(*pattern == '[') { |
| /* skip over IPv6 literals and [] */ |
| size_t skip = 0; |
| if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']')) |
| skip = 2; |
| if(skip) { |
| memcpy(buf, pattern, skip); |
| buf += skip; |
| pattern += skip; |
| sublen += skip; |
| continue; |
| } |
| break; |
| } |
| if(*pattern == '}' || *pattern == ']') |
| return GLOBERROR("unmatched close brace/bracket", pos, |
| CURLE_URL_MALFORMAT); |
| |
| /* only allow \ to escape known "special letters" */ |
| if(*pattern == '\\' && |
| (*(pattern + 1) == '{' || *(pattern + 1) == '[' || |
| *(pattern + 1) == '}' || *(pattern + 1) == ']') ) { |
| |
| /* escape character, skip '\' */ |
| ++pattern; |
| ++pos; |
| } |
| *buf++ = *pattern++; /* copy character to literal */ |
| ++pos; |
| sublen++; |
| } |
| if(sublen) { |
| /* we got a literal string, add it as a single-item list */ |
| *buf = '\0'; |
| res = glob_fixed(glob, glob->glob_buffer, sublen); |
| } |
| else { |
| switch (*pattern) { |
| case '\0': /* done */ |
| break; |
| |
| case '{': |
| /* process set pattern */ |
| pattern++; |
| pos++; |
| res = glob_set(glob, &pattern, &pos, amount, globindex++); |
| break; |
| |
| case '[': |
| /* process range pattern */ |
| pattern++; |
| pos++; |
| res = glob_range(glob, &pattern, &pos, amount, globindex++); |
| break; |
| } |
| } |
| |
| if(++glob->size >= GLOB_PATTERN_NUM) |
| return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT); |
| } |
| return res; |
| } |
| |
| CURLcode glob_url(struct URLGlob **glob, char *url, unsigned long *urlnum, |
| FILE *error) |
| { |
| /* |
| * We can deal with any-size, just make a buffer with the same length |
| * as the specified URL! |
| */ |
| struct URLGlob *glob_expand; |
| unsigned long amount = 0; |
| char *glob_buffer; |
| CURLcode res; |
| |
| *glob = NULL; |
| |
| glob_buffer = malloc(strlen(url) + 1); |
| if(!glob_buffer) |
| return CURLE_OUT_OF_MEMORY; |
| glob_buffer[0] = 0; |
| |
| glob_expand = calloc(1, sizeof(struct URLGlob)); |
| if(!glob_expand) { |
| Curl_safefree(glob_buffer); |
| return CURLE_OUT_OF_MEMORY; |
| } |
| glob_expand->urllen = strlen(url); |
| glob_expand->glob_buffer = glob_buffer; |
| |
| res = glob_parse(glob_expand, url, 1, &amount); |
| if(!res) |
| *urlnum = amount; |
| else { |
| if(error && glob_expand->error) { |
| char text[512]; |
| const char *t; |
| if(glob_expand->pos) { |
| msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^", |
| glob_expand->error, |
| glob_expand->pos, url, (int)glob_expand->pos - 1, " "); |
| t = text; |
| } |
| else |
| t = glob_expand->error; |
| |
| /* send error description to the error-stream */ |
| fprintf(error, "curl: (%d) %s\n", res, t); |
| } |
| /* it failed, we cleanup */ |
| glob_cleanup(glob_expand); |
| *urlnum = 1; |
| return res; |
| } |
| |
| *glob = glob_expand; |
| return CURLE_OK; |
| } |
| |
| void glob_cleanup(struct URLGlob *glob) |
| { |
| size_t i; |
| int elem; |
| |
| if(!glob) |
| return; |
| |
| for(i = 0; i < glob->size; i++) { |
| if((glob->pattern[i].type == UPTSet) && |
| (glob->pattern[i].content.Set.elements)) { |
| for(elem = glob->pattern[i].content.Set.size - 1; |
| elem >= 0; |
| --elem) { |
| Curl_safefree(glob->pattern[i].content.Set.elements[elem]); |
| } |
| Curl_safefree(glob->pattern[i].content.Set.elements); |
| } |
| } |
| Curl_safefree(glob->glob_buffer); |
| Curl_safefree(glob); |
| } |
| |
| CURLcode glob_next_url(char **globbed, struct URLGlob *glob) |
| { |
| struct URLPattern *pat; |
| size_t i; |
| size_t len; |
| size_t buflen = glob->urllen + 1; |
| char *buf = glob->glob_buffer; |
| |
| *globbed = NULL; |
| |
| if(!glob->beenhere) |
| glob->beenhere = 1; |
| else { |
| bool carry = TRUE; |
| |
| /* implement a counter over the index ranges of all patterns, starting |
| with the rightmost pattern */ |
| for(i = 0; carry && (i < glob->size); i++) { |
| carry = FALSE; |
| pat = &glob->pattern[glob->size - 1 - i]; |
| switch(pat->type) { |
| case UPTSet: |
| if((pat->content.Set.elements) && |
| (++pat->content.Set.ptr_s == pat->content.Set.size)) { |
| pat->content.Set.ptr_s = 0; |
| carry = TRUE; |
| } |
| break; |
| case UPTCharRange: |
| pat->content.CharRange.ptr_c = |
| (char)(pat->content.CharRange.step + |
| (int)((unsigned char)pat->content.CharRange.ptr_c)); |
| if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { |
| pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; |
| carry = TRUE; |
| } |
| break; |
| case UPTNumRange: |
| pat->content.NumRange.ptr_n += pat->content.NumRange.step; |
| if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { |
| pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; |
| carry = TRUE; |
| } |
| break; |
| default: |
| printf("internal error: invalid pattern type (%d)\n", (int)pat->type); |
| return CURLE_FAILED_INIT; |
| } |
| } |
| if(carry) { /* first pattern ptr has run into overflow, done! */ |
| return CURLE_OK; |
| } |
| } |
| |
| for(i = 0; i < glob->size; ++i) { |
| pat = &glob->pattern[i]; |
| switch(pat->type) { |
| case UPTSet: |
| if(pat->content.Set.elements) { |
| msnprintf(buf, buflen, "%s", |
| pat->content.Set.elements[pat->content.Set.ptr_s]); |
| len = strlen(buf); |
| buf += len; |
| buflen -= len; |
| } |
| break; |
| case UPTCharRange: |
| if(buflen) { |
| *buf++ = pat->content.CharRange.ptr_c; |
| *buf = '\0'; |
| buflen--; |
| } |
| break; |
| case UPTNumRange: |
| msnprintf(buf, buflen, "%0*lu", |
| pat->content.NumRange.padlength, |
| pat->content.NumRange.ptr_n); |
| len = strlen(buf); |
| buf += len; |
| buflen -= len; |
| break; |
| default: |
| printf("internal error: invalid pattern type (%d)\n", (int)pat->type); |
| return CURLE_FAILED_INIT; |
| } |
| } |
| |
| *globbed = strdup(glob->glob_buffer); |
| if(!*globbed) |
| return CURLE_OUT_OF_MEMORY; |
| |
| return CURLE_OK; |
| } |
| |
| #define MAX_OUTPUT_GLOB_LENGTH (10*1024) |
| |
| CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob) |
| { |
| char numbuf[18]; |
| char *appendthis = (char *)""; |
| size_t appendlen = 0; |
| struct curlx_dynbuf dyn; |
| |
| *result = NULL; |
| |
| /* We cannot use the glob_buffer for storage since the filename may be |
| * longer than the URL we use. |
| */ |
| curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH); |
| |
| while(*filename) { |
| if(*filename == '#' && ISDIGIT(filename[1])) { |
| char *ptr = filename; |
| unsigned long num = strtoul(&filename[1], &filename, 10); |
| struct URLPattern *pat = NULL; |
| |
| if(num && (num < glob->size)) { |
| unsigned long i; |
| num--; /* make it zero based */ |
| /* find the correct glob entry */ |
| for(i = 0; i<glob->size; i++) { |
| if(glob->pattern[i].globindex == (int)num) { |
| pat = &glob->pattern[i]; |
| break; |
| } |
| } |
| } |
| |
| if(pat) { |
| switch(pat->type) { |
| case UPTSet: |
| if(pat->content.Set.elements) { |
| appendthis = pat->content.Set.elements[pat->content.Set.ptr_s]; |
| appendlen = |
| strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); |
| } |
| break; |
| case UPTCharRange: |
| numbuf[0] = pat->content.CharRange.ptr_c; |
| numbuf[1] = 0; |
| appendthis = numbuf; |
| appendlen = 1; |
| break; |
| case UPTNumRange: |
| msnprintf(numbuf, sizeof(numbuf), "%0*lu", |
| pat->content.NumRange.padlength, |
| pat->content.NumRange.ptr_n); |
| appendthis = numbuf; |
| appendlen = strlen(numbuf); |
| break; |
| default: |
| fprintf(stderr, "internal error: invalid pattern type (%d)\n", |
| (int)pat->type); |
| curlx_dyn_free(&dyn); |
| return CURLE_FAILED_INIT; |
| } |
| } |
| else { |
| /* #[num] out of range, use the #[num] in the output */ |
| filename = ptr; |
| appendthis = filename++; |
| appendlen = 1; |
| } |
| } |
| else { |
| appendthis = filename++; |
| appendlen = 1; |
| } |
| if(curlx_dyn_addn(&dyn, appendthis, appendlen)) |
| return CURLE_OUT_OF_MEMORY; |
| } |
| |
| if(curlx_dyn_addn(&dyn, "", 0)) |
| return CURLE_OUT_OF_MEMORY; |
| |
| #if defined(MSDOS) || defined(WIN32) |
| { |
| char *sanitized; |
| SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn), |
| (SANITIZE_ALLOW_PATH | |
| SANITIZE_ALLOW_RESERVED)); |
| curlx_dyn_free(&dyn); |
| if(sc) |
| return CURLE_URL_MALFORMAT; |
| *result = sanitized; |
| return CURLE_OK; |
| } |
| #else |
| *result = curlx_dyn_ptr(&dyn); |
| return CURLE_OK; |
| #endif /* MSDOS || WIN32 */ |
| } |