Skip to content

Commit

Permalink
Change member ownership (#14)
Browse files Browse the repository at this point in the history
* get rid of useless multiplication by sizeof(char), because it is 1. ALWAYS!

* use 'const size_t' for lenght, get rid of useless casts. initialize variables on definition, if possible.

* free() works fine on NULL, so no separate NULL check is necessary.

* fix typos.

* add "-g" to Makefile.

* change interface: make members const, add documentation for the public functions.

* rewrite url.c to use simpler & correct parser, less memory allocations etc.

* add generator program for char category table.

* update comments, mark old functions as deprecated

* update .gitignore

* fix dependencies in Makefile

* add category IPv6Char

* progress to new parser implementation

* simplify parsing of hostname & port

* implement URL parser completely. Now it's time for testing. :-D

* fix incorrect handling of / in URLs. Paths are not strictly RFC-compliant, yet.

* re-implement all the API functions (but might behave differently now)

* add a hack to keep the '/' at the beginning of the path.

* fix test: '#' introduces the fragment, but itself is not part of it.

* add handling of %xx encoding. (only for path, yet)

* call decode_percent() also for the URL fragment, and test it.

* add new char category for query keys an values

* rever... QueryKey and QueryValue are not necessary anymore.

* implement %-decoding of key-value pairs in query string.

* remove debug output.

* add more bizarre key-value pairs in test URL

* add test for keys without value -> Oh, it is parsed not as expeced!

* keys without value will return empty string "", not NULL.

---------

Co-authored-by: roker <[email protected]>
Co-authored-by: RokerHRO <[email protected]>
  • Loading branch information
3 people authored Jun 14, 2024
1 parent 2d0cca2 commit 752635e
Show file tree
Hide file tree
Showing 7 changed files with 509 additions and 348 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
url-test
*~
*.o
21 changes: 16 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
.PHONY: all clean url-test

all: clean test
all: url-test

clean:
rm -rf url-test
rm -rvf url-test *.o url_char_category_table.h

test: test.c
$(CC) -std=c99 -Wall -I. url.c $< -o url-test
%.o: %.c
$(CC) -g -std=c99 -Wall -c -o $@ $^

url.c: url.h url_char_category.h url_char_category_table.h

url_char_category_table.h: gen_char_category_table
./gen_char_category_table > url_char_category_table.h

gen_char_category_table: gen_char_category_table.o
$(CC) -g -std=c99 -Wall -o $@ $^

url-test: test.o url.o
$(CC) -g -std=c99 -Wall -I. -o $@ $^
./url-test

.PHONY: all clean test
77 changes: 77 additions & 0 deletions gen_char_category_table.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// generated the array 'char_cat' which is #included and used in url.c

#include <stdio.h>

#include "url_char_category.h"

unsigned char_cat[256];

// character categories according to RFC 3986:
const char* const alnum="0123456789"
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";

const char* const unreserved = "-._~";
const char* const subdelim = "!$&'()*+,;=";

void fill(unsigned value, const char* alnum, const char* special)
{
for(;*alnum;++alnum)
{
char_cat[ (unsigned char)(*alnum) ] |= value;
}

for(;*special;++special)
{
char_cat[ (unsigned char)(*special) ] |= value;
}
}


void print_table()
{
printf(
"// This file is generated by gen_char_category_table. DO NOT EDIT IT BY HAND!\n"
"\n"
"static const unsigned short char_cat[256] = {\n"
"// .0 .1 .2 .3 .4 .5 .6 .7 .8 .9 .A .B .C .D .E .F\n");

for(unsigned y=0; y<16; ++y)
{
putchar(' ');
for(unsigned x=0; x<16; ++x)
{
const unsigned offset = y*16+x;
printf(" 0x%03x%c", char_cat[offset], (offset==255 ? ' ' : ',') );
}
printf(" // %01X0 ... %01XF\n", y, y);
}
printf("};\n\n");
}


int main()
{
fill( Scheme , alnum, "+-.");

fill( Unreserved, alnum, unreserved);

fill( GenDelim , alnum, ":/?#[]@");

fill( SubDelim , alnum, subdelim);

fill( PCharSlash, alnum, ":@/%"); // part #1
fill( PCharSlash, unreserved, subdelim); // part #2

fill( HexDigit, "0123456789", "abcdef" "ABCDEF");

fill( Query, alnum, "/?:@%"); // part #1
fill( Query, unreserved, subdelim); // part #2

fill( Userinfo, alnum, ":%"); // part #1
fill( Userinfo, unreserved, subdelim); // part #2

fill( IPv6Char, "0123456789", "abcdef" "ABCDEF" ":");

print_table();
}
41 changes: 17 additions & 24 deletions test.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

#include <url.h>
#include "url.h"
#include <assert.h>
#include <string.h>

Expand All @@ -17,7 +17,7 @@ main (void) {
//url_inspect("https://google.com/search?q=github");

char *gh_url = "git://[email protected]:jwerle/url.h.git";
char *url = "http://user:[email protected]:8080/p/a/t/h?query=string#hash";
char *url = "http://user:[email protected]:8080/p/%C3%A5/t/h?qu%C3%ABry=strin%C4%9F&foo=bar=yuk&key%23%26%3D=%25&lol#h%C3%a6sh";

url_data_t *parsed = url_parse(url);
url_data_t *gh_parsed = url_parse(gh_url);
Expand All @@ -28,24 +28,19 @@ main (void) {
url_data_inspect(parsed);
url_data_inspect(gh_parsed);

assert(parsed->href);
assert(parsed->auth);
assert(parsed->whole_url);
assert(parsed->protocol);
assert(parsed->port);
assert(parsed->hostname);
assert(parsed->userinfo);
assert(parsed->host);
assert(parsed->pathname);
assert(parsed->port);
assert(parsed->path);
assert(parsed->hash);
assert(parsed->search);
assert(parsed->query);
assert(parsed->fragment);

assert(gh_parsed->href);
assert(gh_parsed->whole_url);
assert(gh_parsed->protocol);
assert(gh_parsed->userinfo);
assert(gh_parsed->host);
assert(gh_parsed->auth);
assert(gh_parsed->hostname);
assert(gh_parsed->pathname);
assert(gh_parsed->path);

assert(url_is_protocol("http"));
Expand All @@ -57,21 +52,19 @@ main (void) {
assert(url_is_protocol("javascript"));

STRING_ASSERT("http", url_get_protocol(url));
STRING_ASSERT("user:pass", url_get_auth (url));
STRING_ASSERT("subdomain.host.com:8080", url_get_hostname(url));
STRING_ASSERT("subdomain.host.com", url_get_host (url));
STRING_ASSERT("/p/a/t/h", url_get_pathname(url));
STRING_ASSERT("/p/a/t/h?query=string#hash", url_get_path (url));
STRING_ASSERT("?query=string", url_get_search (url));
STRING_ASSERT("query=string", url_get_query (url));
STRING_ASSERT("#hash", url_get_hash (url));
STRING_ASSERT("user:pass", url_get_userinfo(url));
STRING_ASSERT("subdomain.host.com", url_get_hostname(url));
STRING_ASSERT("/p/\xc3\xa5/t/h", url_get_path (url));
assert( strcmp("strin\xc4\x9f", url_get_query_value (parsed, "qu\xc3\xabry"))==0 );
assert( strcmp("bar=yuk", url_get_query_value (parsed, "foo"))==0 );
assert( strcmp("%", url_get_query_value (parsed, "key#&="))==0 );
assert( strcmp("", url_get_query_value (parsed, "lol"))==0 );
STRING_ASSERT("hæsh", url_get_fragment(url));
STRING_ASSERT("8080", url_get_port (url));

STRING_ASSERT("git", url_get_protocol(gh_url));
STRING_ASSERT("github.com", url_get_host (gh_url));
STRING_ASSERT("github.com", url_get_hostname(gh_url));
STRING_ASSERT("git", url_get_auth (gh_url));
STRING_ASSERT("jwerle/url.h.git", url_get_pathname(gh_url));
STRING_ASSERT("git", url_get_userinfo(gh_url));
STRING_ASSERT("jwerle/url.h.git", url_get_path (gh_url));

url_free(parsed);
Expand Down
Loading

0 comments on commit 752635e

Please sign in to comment.