13
0
livetrax/libs/aaf/URIParser.c

1039 lines
24 KiB
C
Raw Normal View History

/*
2024-03-10 16:55:40 -04:00
* Copyright (C) 2023-2024 Adrien Gesta-Fline
*
* This file is part of libAAF.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
2024-03-10 16:55:40 -04:00
#include <assert.h>
#include <ctype.h>
2024-03-11 13:52:45 -04:00
#include <limits.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#include <BaseTsd.h>
typedef SSIZE_T ssize_t;
#endif
#include "aaf/URIParser.h"
2024-03-10 16:55:40 -04:00
#include "aaf/utils.h"
#define debug(...) \
2024-03-10 16:55:40 -04:00
AAF_LOG (log, NULL, DEBUG_SRC_ID_AAF_IFACE, VERB_DEBUG, __VA_ARGS__)
#define warning(...) \
2024-03-10 16:55:40 -04:00
AAF_LOG (log, NULL, DEBUG_SRC_ID_AAF_IFACE, VERB_WARNING, __VA_ARGS__)
#define error(...) \
2024-03-10 16:55:40 -04:00
AAF_LOG (log, NULL, DEBUG_SRC_ID_AAF_IFACE, VERB_ERROR, __VA_ARGS__)
#define IS_LOWALPHA(c) \
((c >= 'a') && (c <= 'z'))
#define IS_UPALPHA(c) \
((c >= 'A') && (c <= 'Z'))
#define IS_ALPHA(c) \
(IS_LOWALPHA (c) || IS_UPALPHA (c))
#define IS_DIGIT(c) \
(c >= '0' && c <= '9')
#define IS_ALPHANUM(c) \
(IS_ALPHA (c) || IS_DIGIT (c))
#define IS_HEX(c) \
(IS_DIGIT (c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
/*
* RFC 2396
* https://datatracker.ietf.org/doc/html/rfc2396#section-2.3
*/
#define IS_MARK(c) \
(c == '-' || c == '_' || c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')')
#define IS_UNRESERVED(c) \
(IS_ALPHANUM (c) || IS_MARK (c))
#define IS_ENCODED(p) \
((*p == '%') && IS_HEX (*(p + 1)) && IS_HEX (*(p + 2)))
#define SCHEME_SAFE_CHAR(c) \
(IS_ALPHANUM (c) || c == '+' || c == '.' || c == '-')
#define USERINFO_SAFE_CHAR(p) \
(IS_ALPHANUM (*p) || IS_ENCODED (p) || *p == ';' || *p == ':' || *p == '&' || *p == '=' || *p == '+' || *p == '$' || *p == ',')
#define WINDOWS_DRIVE_LETTER(p) \
(IS_ALPHA (*p) && (*(p + 1) == ':' || *(p + 1) == '|') && *(p + 2) == '/')
#define SCHEME_ALLOW_QUERY(uri) \
(uri->scheme_t != URI_SCHEME_T_FILE && \
!(uri->opts & URI_OPT_IGNORE_QUERY))
#define SCHEME_ALLOW_FRAGMENT(uri) \
(uri->scheme_t != URI_SCHEME_T_FILE && \
!(uri->opts & URI_OPT_IGNORE_FRAGMENT))
2024-03-10 16:55:40 -04:00
#define URI_SET_STR(str, start, end) \
\
str = malloc (sizeof (char) * (uint32_t) ((end - start) + 1)); \
\
if (!str) { \
error ("Out of memory"); \
goto err; \
} \
\
snprintf (str, (uint32_t) (end - start) + 1, "%s", start);
static char*
uriDecodeString (char* src, char* dst);
static int
uriIsIPv4 (const char* s, size_t size, char** err);
static int
uriIsIPv6 (const char* s, size_t size, char** err);
static int
2024-03-10 16:55:40 -04:00
_uri_parse_scheme (struct uri* uri, const char** pos, const char* end, struct aafLog* log);
static int
2024-03-10 16:55:40 -04:00
_uri_parse_authority (struct uri* uri, const char** pos, const char* end, struct aafLog* log);
static int
2024-03-10 16:55:40 -04:00
_uri_parse_userinfo (struct uri* uri, const char** pos, const char* end, struct aafLog* log);
static int
2024-03-10 16:55:40 -04:00
_uri_parse_hostname (struct uri* uri, const char** pos, const char* end, struct aafLog* log);
static int
2024-03-10 16:55:40 -04:00
_uri_parse_path (struct uri* uri, const char** pos, const char* end, struct aafLog* log);
static int
2024-03-10 16:55:40 -04:00
_uri_parse_query (struct uri* uri, const char** pos, const char* end, struct aafLog* log);
static int
2024-03-10 16:55:40 -04:00
_uri_parse_fragment (struct uri* uri, const char** pos, const char* end, struct aafLog* log);
static void
_uri_scheme2schemeType (struct uri* uri);
2024-03-10 16:55:40 -04:00
static char*
uriDecodeString (char* src, char* dst)
{
if (src == NULL) {
return NULL;
}
if (dst == NULL) {
dst = src;
}
2024-03-10 16:55:40 -04:00
char* end = src + strlen (src);
while (*src) {
if (*src == '%' && src + 2 < end && IS_HEX (*(src + 1)) && IS_HEX (*(src + 2))) {
char d1 = *(src + 1);
char d2 = *(src + 2);
2024-03-10 16:55:40 -04:00
int digit = 0;
2024-03-10 16:55:40 -04:00
digit = (d1 >= 'A' ? ((d1 & 0xdf) - 'A') + 10 : (d1 - '0'));
digit <<= 4;
digit += (d2 >= 'A' ? ((d2 & 0xdf) - 'A') + 10 : (d2 - '0'));
2024-03-10 16:55:40 -04:00
assert (digit > CHAR_MIN && digit < UCHAR_MAX);
2024-03-10 16:55:40 -04:00
*dst = (char)digit;
2024-03-10 16:55:40 -04:00
src += 3;
dst++;
} else {
2024-03-10 16:55:40 -04:00
*dst = *src;
src++;
dst++;
}
}
2024-03-10 16:55:40 -04:00
*dst = 0x00;
return dst;
}
static int
2024-03-10 16:55:40 -04:00
_uri_parse_scheme (struct uri* uri, const char** pos, const char* end, struct aafLog* log)
{
const char* p = *pos;
while (p < end && *p != ':') {
if (!SCHEME_SAFE_CHAR (*p)) {
error ("uri scheme contains invalid character : '%c' (0x%02x)", *p, *p);
goto err;
}
p++;
}
if (*pos == p) {
error ("uri is missing scheme");
goto err;
}
URI_SET_STR (uri->scheme, *pos, p);
/*
* RFC 3986 - Generic
* https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
*
* « Although schemes are case- insensitive, the canonical form is lowercase
* and documents that specify schemes must do so with lowercase letters.
* An implementation should accept uppercase letters as equivalent to lowercase
* in scheme names (e.g., allow "HTTP" as well as "http") for the sake of
* robustness but should only produce lowercase scheme names for consistency.»
*/
char* pp = uri->scheme;
while (*pp) {
2024-03-10 16:55:40 -04:00
int charint = tolower (*pp);
assert (charint > CHAR_MIN && charint < CHAR_MAX);
*pp = (char)charint;
pp++;
}
_uri_scheme2schemeType (uri);
*pos = ++p; /* Skips ':' */
return 1;
err:
return -1;
}
static int
2024-03-10 16:55:40 -04:00
_uri_parse_authority (struct uri* uri, const char** pos, const char* end, struct aafLog* log)
{
/*
* RFC 3986 - Uniform Resource Identifier (URI): Generic Syntax
* https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
*
* « Many URI schemes include a hierarchical element for a naming
* authority so that governance of the name space defined by the
* remainder of the URI is delegated to that authority (which may, in
* turn, delegate it further). The generic syntax provides a common
* means for distinguishing an authority based on a registered name or
* server address, along with optional port and user information.
*
* The authority component is preceded by a double slash ("//") and is
* terminated by the next slash ("/"), question mark ("?"), or number
* sign ("#") character, or by the end of the URI.
*
* authority = [ userinfo "@" ] host [ ":" port ]
*
* URI producers and normalizers should omit the ":" delimiter that
* separates host from port if the port component is empty. Some
* schemes do not allow the userinfo and/or port subcomponents.
*
* If a URI contains an authority component, then the path component
* must either be empty or begin with a slash ("/") character. Non-
* validating parsers (those that merely separate a URI reference into
* its major components) will often ignore the subcomponent structure of
* authority, treating it as an opaque string from the double-slash to
* the first terminating delimiter, until such time as the URI is
* dereferenced.»
*/
if (*(*pos) != '/' ||
*((*pos) + 1) != '/') {
/* uri has no authority */
if (uri->scheme_t == URI_SCHEME_T_FILE) {
uri->flags |= URI_T_LOCALHOST;
}
// uri->flags |= URI_T_LOCALHOST;
// uri->flags |= URI_T_HOST_EMPTY;
return 0;
}
*pos += 2;
const char* p = *pos;
while (p < end &&
*p != '/' &&
(!SCHEME_ALLOW_QUERY (uri) || *p != '?') &&
(!SCHEME_ALLOW_FRAGMENT (uri) || *p != '#')) {
p++;
}
URI_SET_STR (uri->authority, *pos, p);
if (*uri->authority == 0x00) {
uri->flags |= URI_T_LOCALHOST;
/* TODO: return 0 ? */
}
return 1;
err:
return -1;
}
static int
2024-03-10 16:55:40 -04:00
_uri_parse_userinfo (struct uri* uri, const char** pos, const char* end, struct aafLog* log)
{
int hasUserinfo = 0;
int userinfoIllegalCharacters = 0;
const char* p = *pos;
while (p < end &&
/* end of authority */
*p != '/' &&
(!SCHEME_ALLOW_QUERY (uri) || *p != '?') &&
(!SCHEME_ALLOW_FRAGMENT (uri) || *p != '#')) {
if (*p == '@') {
hasUserinfo = 1;
break;
}
if (!USERINFO_SAFE_CHAR (p)) {
userinfoIllegalCharacters++;
}
p++;
}
if (!hasUserinfo) {
return 0;
}
if (userinfoIllegalCharacters > 0) {
error ("uri userinfo contains %i invalid char%s", userinfoIllegalCharacters, (userinfoIllegalCharacters > 1) ? "s" : "");
goto err;
}
/* user / pass */
URI_SET_STR (uri->userinfo, *pos, p);
*pos = p + 1; // skips '@'
const char* subpos = NULL;
p = uri->userinfo;
while (1) {
if (!*p) {
if (subpos) {
URI_SET_STR (uri->pass, subpos, p);
} else {
URI_SET_STR (uri->user, uri->userinfo, p);
}
break;
} else if (*p == ':') {
URI_SET_STR (uri->user, uri->userinfo, p);
subpos = p + 1;
}
p++;
}
if (uri->opts & URI_OPT_DECODE_USERINFO && uri->userinfo) {
uriDecodeString (uri->userinfo, NULL);
}
if (uri->opts & URI_OPT_DECODE_USERPASS) {
if (uri->user)
uriDecodeString (uri->user, NULL);
if (uri->pass)
uriDecodeString (uri->pass, NULL);
}
return 1;
err:
return -1;
}
static int
2024-03-10 16:55:40 -04:00
_uri_parse_hostname (struct uri* uri, const char** pos, const char* end, struct aafLog* log)
{
const char* p = *pos;
if (**pos == '[') {
/*
* IPv6 - RFC 2732
* https://datatracker.ietf.org/doc/html/rfc2732
*/
(*pos)++; // skips '['
while (p < end &&
*p != ']') {
p++;
}
URI_SET_STR (uri->host, *pos, p);
char* iperr = NULL;
int rc = 0;
if ((rc = uriIsIPv6 (uri->host, strlen (uri->host), &iperr))) {
uri->flags |= URI_T_HOST_IPV6;
if (rc == 2) {
uri->flags |= URI_T_LOCALHOST;
}
} else {
error ("URI IPv6 Parser error : %s\n", iperr);
free (iperr);
goto err;
}
p++; // skips ']'
} else if ((*p == '.' || *p == '?') && (*(p + 1) == '/')) {
/* windows "//./" and "//?/" guard */
uri->flags |= URI_T_LOCALHOST;
return 0;
} else {
/*
* All other : IPv4, server name, local path
*/
while (p < end &&
*p != '/' && // if URI contains a path
*p != ':' && // if URI has an explicit port
(!SCHEME_ALLOW_QUERY (uri) || *p != '?') &&
(!SCHEME_ALLOW_FRAGMENT (uri) || *p != '#')) {
p++;
}
2024-03-10 16:55:40 -04:00
// debug( L" >>> %.*s", (int)(p-*pos), p );
URI_SET_STR (uri->host, *pos, p);
}
// if ( !(uri->flags & URI_T_HOST_IPV6 || uri->flags & URI_T_HOST_EMPTY) ) {
if (!(uri->flags & URI_T_HOST_IPV6) && uri->host != NULL && *uri->host != 0x00) {
if (uriIsIPv4 (uri->host, strlen (uri->host), NULL)) {
2024-03-10 16:55:40 -04:00
uri->flags &= ~(unsigned)URI_T_HOST_MASK;
uri->flags |= URI_T_HOST_IPV4;
if (strcmp (uri->host, "127.0.0.1") == 0) {
uri->flags |= URI_T_LOCALHOST;
}
} else if (strcmp (uri->host, "localhost") == 0) {
uri->flags |= URI_T_LOCALHOST;
} else {
uri->flags |= URI_T_HOST_REGNAME;
}
if (uri->opts & URI_OPT_DECODE_HOSTNAME) {
uriDecodeString (uri->host, NULL);
}
}
// else if ( uri->host == NULL ) {
// if ( uri->scheme_t == URI_SCHEME_T_FILE ) {
// uri->flags |= URI_T_LOCALHOST;
// }
// }
if (*p == ':') {
/* port */
*pos = ++p;
while (p < end &&
*p != '/' &&
(!SCHEME_ALLOW_QUERY (uri) || *p != '?') &&
(!SCHEME_ALLOW_FRAGMENT (uri) || *p != '#')) {
if (!IS_DIGIT (*p)) {
error ("URI port contains non-digit char : %c (0x%02x).\n", *p, *p);
goto err;
}
p++;
}
uri->port = atoi (*pos);
}
*pos = p; // keeps next char, first path '/'
return 1;
err:
return -1;
}
static int
2024-03-10 16:55:40 -04:00
_uri_parse_path (struct uri* uri, const char** pos, const char* end, struct aafLog* log)
{
int winDrive = 0;
/* sanitize start of path : ignores all slashes (after already parsed '//' identifying start of authority) */
while (*(*pos + 1) == '/') {
(*pos)++;
}
if (*(*pos) == '/' && WINDOWS_DRIVE_LETTER (((*pos) + 1))) {
/*
* Windows Drive (c: / c|) - RFC 8089
* https://datatracker.ietf.org/doc/html/rfc8089#appendix-E.2.2
*/
(*pos)++; /* moves forward last slash before driver letter, so path starts at the letter with no slash before. */
winDrive = 1;
}
const char* p = *pos;
while (p < end &&
(!SCHEME_ALLOW_QUERY (uri) || *p != '?') &&
(!SCHEME_ALLOW_FRAGMENT (uri) || *p != '#')) {
p++;
}
2024-03-10 16:55:40 -04:00
// debug( L" >>> (%i) %.*s", (int)(p-*pos), (int)(p-*pos), p );
URI_SET_STR (uri->path, *pos, p);
if (winDrive) {
if (uri->path[1] == '|') {
/*
* https://datatracker.ietf.org/doc/html/rfc8089#appendix-E.2.2
* « To update such an old URI, replace the vertical line "|" with a colon ":" »
*/
uri->path[1] = ':';
}
}
if (uri->opts & URI_OPT_DECODE_PATH) {
uriDecodeString (uri->path, NULL);
}
*pos = p;
return 1;
err:
return -1;
}
static int
2024-03-10 16:55:40 -04:00
_uri_parse_query (struct uri* uri, const char** pos, const char* end, struct aafLog* log)
{
const char* p = *pos;
if (!(uri->opts & URI_OPT_IGNORE_QUERY) && **pos == '?') {
while (p < end && *p != '#') {
p++;
}
(*pos)++; // skips '?'
URI_SET_STR (uri->query, *pos, p);
if (uri->opts & URI_OPT_DECODE_QUERY) {
uriDecodeString (uri->query, NULL);
}
*pos = p;
}
return 1;
err:
return -1;
}
static int
2024-03-10 16:55:40 -04:00
_uri_parse_fragment (struct uri* uri, const char** pos, const char* end, struct aafLog* log)
{
/*
* https://datatracker.ietf.org/doc/html/draft-yevstifeyev-ftp-uri-scheme#section-3.2.4.2
* « ... fragment identifier are allowed in any URI.
*
* The number sign ("#") characters (ASCII character 0x23), if used for
* the reason other than to delimit the fragment identifier SHALL be
* percent-encoded. »
*
* However, we've seen filenames in 'file' scheme with non encoded '#'.
* Plus, it seems impossible for a client to use fragments in a 'file'
* scheme URI. So the SCHEME_ALLOW_FRAGMENT() macro will make the parser
* treat '#' chars as a normal character, only for 'file' scheme.
*/
const char* p = *pos;
if (!(uri->opts & URI_OPT_IGNORE_FRAGMENT) && **pos == '#') {
while (p < end) {
p++;
}
(*pos)++; // skips '#'
URI_SET_STR (uri->fragment, *pos, p);
if (uri->opts & URI_OPT_DECODE_FRAGMENT) {
uriDecodeString (uri->fragment, NULL);
}
*pos = ++p; // skips '#'
}
return 1;
err:
return -1;
}
struct uri*
2024-03-10 16:55:40 -04:00
laaf_uri_parse (const char* uristr, enum uri_option optflags, struct aafLog* log)
{
if (uristr == NULL) {
return NULL;
}
struct uri* uri = calloc (1, sizeof (struct uri));
2024-03-10 16:55:40 -04:00
if (!uri) {
error ("Out of memory");
return NULL;
}
size_t urilen = strlen (uristr);
if (urilen >= MAX_URI_LENGTH) {
error ("uri is too long");
goto err;
}
uri->opts = optflags;
const char* pos = uristr;
const char* end = pos + urilen;
2024-03-10 16:55:40 -04:00
_uri_parse_scheme (uri, &pos, end, log);
2024-03-10 16:55:40 -04:00
if (_uri_parse_authority (uri, &pos, end, log)) {
_uri_parse_userinfo (uri, &pos, end, log);
_uri_parse_hostname (uri, &pos, end, log);
}
2024-03-10 16:55:40 -04:00
_uri_parse_path (uri, &pos, end, log);
if (SCHEME_ALLOW_QUERY (uri)) {
2024-03-10 16:55:40 -04:00
_uri_parse_query (uri, &pos, end, log);
}
if (SCHEME_ALLOW_FRAGMENT (uri)) {
2024-03-10 16:55:40 -04:00
_uri_parse_fragment (uri, &pos, end, log);
}
goto end;
err:
2024-03-10 16:55:40 -04:00
laaf_uri_free (uri);
uri = NULL;
end:
return uri;
}
void
2024-03-10 16:55:40 -04:00
laaf_uri_free (struct uri* uri)
{
2024-03-10 16:55:40 -04:00
if (!uri) {
return;
}
2024-03-10 16:55:40 -04:00
free (uri->scheme);
free (uri->userinfo);
free (uri->authority);
free (uri->user);
free (uri->pass);
free (uri->host);
free (uri->path);
free (uri->query);
free (uri->fragment);
free (uri);
}
2024-03-10 16:55:40 -04:00
static int
uriIsIPv4 (const char* s, size_t size, char** err)
{
int octets = 0;
const char* currentOctetStart = s;
char prev = 0;
2024-03-10 16:55:40 -04:00
for (size_t i = 0; i <= size; i++) {
if (prev == 0) {
if (IS_DIGIT (*(s + i))) {
currentOctetStart = (s + i);
prev = 'd';
continue;
}
if (*(s + i) == '.') {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "IPV4 parser error : can't start with a single '.'");
}
return 0;
}
}
if (prev == 'p') {
if (IS_DIGIT (*(s + i))) {
currentOctetStart = (s + i);
prev = 'd';
continue;
}
if (*(s + i) == '.') {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "IPV4 parser error : can't have successive '.'");
}
return 0;
}
}
if (prev == 'd') {
if (IS_DIGIT (*(s + i))) {
prev = 'd';
continue;
}
if (i == size || *(s + i) == '.') { // period
int octet = atoi (currentOctetStart);
if (octet > 255) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "IPV4 parser error : octet %i is too high : %.*s", (octets), (int)((s + i) - currentOctetStart), currentOctetStart);
}
return 0;
}
if (i + 1 == size) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "IPV4 parser error : can't end with a single '.'");
}
return 0;
}
prev = 'p';
octets++;
continue;
}
}
if (i == size) {
break;
}
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "IPV4 parser error : illegal char '%c' (0x%02x)", *(s + i), *(s + i));
}
return 0;
}
if (octets > 4) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "IPV4 parser error : too many octets");
}
return 0;
}
if (octets < 4) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "IPV4 parser error : not enough octets");
}
return 0;
}
return 1;
}
2024-03-10 16:55:40 -04:00
static int
uriIsIPv6 (const char* s, size_t size, char** err)
{
int segmentCount = 0;
int emptySegmentCount = 0;
int curSegmentLength = 0;
int ipv4portion = 0;
int loopback = 0;
const char* curSegmentStart = s;
char prev = 0;
2024-03-10 16:55:40 -04:00
for (size_t i = 0; i <= size; i++) {
if (prev == 0) {
if (IS_HEX (*(s + i))) {
segmentCount++;
curSegmentStart = s + i;
curSegmentLength++;
prev = 'h'; // hex
if (loopback >= 0) {
if (!IS_DIGIT (*(s + i))) {
loopback = -1;
} else {
loopback += (*(s + i) - '0'); //atoi(*(s+i));
}
}
continue;
}
if (*(s + i) == ':' && *(s + (i + 1)) == ':') {
emptySegmentCount++;
prev = 'e', // empty
i++;
continue;
}
if (*(s + i) == ':') {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "can't start with a single ':'");
}
return 0;
}
}
if (prev == 'h') { /* hex */
if (IS_HEX (*(s + i))) {
curSegmentLength++;
if (loopback >= 0) {
if (!IS_DIGIT (*(s + i))) {
loopback = -1;
} else {
loopback += (*(s + i) - '0');
}
}
continue;
}
if (*(s + i) == '.') { /* period */
int octet = atoi (curSegmentStart);
if (octet > 255) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "ipv4 portion octet %i is too high : %.*s", (ipv4portion), curSegmentLength, curSegmentStart);
}
return 0;
}
2024-03-10 16:55:40 -04:00
// debug( L"%i", octet );
prev = 'p';
ipv4portion++;
continue;
}
if (i == size || *(s + i) == ':') {
if (curSegmentLength > 4) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "segment %i is too long : %.*s", (segmentCount - 1), curSegmentLength, curSegmentStart);
}
return 0;
}
/* here we can parse segment */
curSegmentStart = NULL;
curSegmentLength = 0;
if (i < size && *(s + (i + 1)) == ':') {
emptySegmentCount++;
prev = 'e', /* empty "::" */
i++;
} else if (i + 1 == size) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "can't end with a single ':'");
}
return 0;
} else {
prev = 'c'; /* colon ":" */
}
continue;
}
}
if (prev == 'e' || prev == 'c') { /* empty or colon */
if (IS_HEX (*(s + i))) {
segmentCount++;
curSegmentStart = s + i;
curSegmentLength++;
prev = 'h'; /* hex */
if (loopback >= 0) {
if (!IS_DIGIT (*(s + i))) {
loopback = -1;
} else {
loopback += (*(s + i) - '0');
}
}
continue;
}
if (*(s + i) == ':') {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "can't have more than two successive ':'");
}
return 0;
}
}
if (prev == 'p') {
if (IS_DIGIT (*(s + i))) {
curSegmentStart = s + i;
prev = 'd';
continue;
}
if (*(s + i) == '.') {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "can't have successive '.'");
}
return 0;
}
}
if (prev == 'd') {
2024-03-10 16:55:40 -04:00
if (IS_DIGIT (*(s + i)) && *(s + i + 1) != '\0' && i + 1 != size) {
prev = 'd';
continue;
}
if (i == size || *(s + i) == '.') { /* period */
int octet = atoi (curSegmentStart);
if (octet > 255) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "ipv4 portion octet %i is too high : %.*s", (ipv4portion), curSegmentLength, curSegmentStart);
}
return 0;
}
2024-03-10 16:55:40 -04:00
// debug( L"%i", octet );
if (i + 1 == size) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "can't end with a single '.'");
}
return 0;
}
prev = 'p';
ipv4portion++;
continue;
}
}
if (i == size) {
break;
}
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "illegal char '%c' (0x%02x)", *(s + i), *(s + i));
}
return 0;
}
2024-03-10 16:55:40 -04:00
// debug( L"segments : %i", segmentCount );
// debug( L"empty segments : %i", emptySegmentCount );
// debug( L"ipv4portion : %i", ipv4portion );
if (ipv4portion > 4) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "too many octets in ipv4 portion : %i", ipv4portion);
}
return 0;
}
if (ipv4portion > 0 && ipv4portion < 4) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "not enough octets in ipv4 portion : %i", ipv4portion);
}
return 0;
}
if (emptySegmentCount + (segmentCount / 2) + ipv4portion > 8) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "too many segments");
}
return 0;
}
if (emptySegmentCount == 0 && (((ipv4portion / 2) + segmentCount) < 8)) {
if (err) {
2024-03-10 16:55:40 -04:00
laaf_util_snprintf_realloc (err, NULL, 0, "not enough segments");
}
return 0;
}
2024-03-10 16:55:40 -04:00
// debug( L"LOCALHOST >>>>>>> %i", loopback );
/*
* 1: valid ipv6 address
* 2: valid ipv6 address and is loopback
*/
return (loopback == 1) ? 2 : 1;
}
static void
_uri_scheme2schemeType (struct uri* uri)
{
if (strcmp (uri->scheme, "afp") == 0) {
uri->scheme_t = URI_SCHEME_T_AFP;
} else if (strcmp (uri->scheme, "cifs") == 0) {
uri->scheme_t = URI_SCHEME_T_CIFS;
} else if (strcmp (uri->scheme, "data") == 0) {
uri->scheme_t = URI_SCHEME_T_DATA;
} else if (strcmp (uri->scheme, "dns") == 0) {
uri->scheme_t = URI_SCHEME_T_DNS;
} else if (strcmp (uri->scheme, "file") == 0) {
uri->scheme_t = URI_SCHEME_T_FILE;
} else if (strcmp (uri->scheme, "ftp") == 0) {
uri->scheme_t = URI_SCHEME_T_FTP;
} else if (strcmp (uri->scheme, "http") == 0) {
uri->scheme_t = URI_SCHEME_T_HTTP;
} else if (strcmp (uri->scheme, "https") == 0) {
uri->scheme_t = URI_SCHEME_T_HTTPS;
} else if (strcmp (uri->scheme, "imap") == 0) {
uri->scheme_t = URI_SCHEME_T_IMAP;
} else if (strcmp (uri->scheme, "irc") == 0) {
uri->scheme_t = URI_SCHEME_T_IRC;
} else if (strcmp (uri->scheme, "mailto") == 0) {
uri->scheme_t = URI_SCHEME_T_MAILTO;
} else if (strcmp (uri->scheme, "nfs") == 0) {
uri->scheme_t = URI_SCHEME_T_NFS;
} else if (strcmp (uri->scheme, "pop") == 0) {
uri->scheme_t = URI_SCHEME_T_POP;
} else if (strcmp (uri->scheme, "rtsp") == 0) {
uri->scheme_t = URI_SCHEME_T_RTSP;
} else if (strcmp (uri->scheme, "sftp") == 0) {
uri->scheme_t = URI_SCHEME_T_SFTP;
} else if (strcmp (uri->scheme, "sip") == 0) {
uri->scheme_t = URI_SCHEME_T_SIP;
} else if (strcmp (uri->scheme, "smb") == 0) {
uri->scheme_t = URI_SCHEME_T_SMB;
} else if (strcmp (uri->scheme, "ssh") == 0) {
uri->scheme_t = URI_SCHEME_T_SSH;
} else if (strcmp (uri->scheme, "tel") == 0) {
uri->scheme_t = URI_SCHEME_T_TEL;
} else if (strcmp (uri->scheme, "telnet") == 0) {
uri->scheme_t = URI_SCHEME_T_TELNET;
} else {
uri->scheme_t = URI_SCHEME_T_UNKNOWN;
}
}