-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* get rid of useless multiplication by sizeof(char), because it is 1. ALWAYS! * use 'const size_t' for lenght, get rid of useless casts. initialize variables on definition, if possible. * free() works fine on NULL, so no separate NULL check is necessary. * fix typos. * add "-g" to Makefile. * change interface: make members const, add documentation for the public functions. * rewrite url.c to use simpler & correct parser, less memory allocations etc. * add generator program for char category table. * update comments, mark old functions as deprecated * update .gitignore * fix dependencies in Makefile * add category IPv6Char * progress to new parser implementation * simplify parsing of hostname & port * implement URL parser completely. Now it's time for testing. :-D * fix incorrect handling of / in URLs. Paths are not strictly RFC-compliant, yet. * re-implement all the API functions (but might behave differently now) * add a hack to keep the '/' at the beginning of the path. * fix test: '#' introduces the fragment, but itself is not part of it. * add handling of %xx encoding. (only for path, yet) * call decode_percent() also for the URL fragment, and test it. * add new char category for query keys an values * rever... QueryKey and QueryValue are not necessary anymore. * implement %-decoding of key-value pairs in query string. * remove debug output. * add more bizarre key-value pairs in test URL * add test for keys without value -> Oh, it is parsed not as expeced! * keys without value will return empty string "", not NULL. --------- Co-authored-by: roker <[email protected]> Co-authored-by: RokerHRO <[email protected]>
- Loading branch information
1 parent
2d0cca2
commit 752635e
Showing
7 changed files
with
509 additions
and
348 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
url-test | ||
*~ | ||
*.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,22 @@ | ||
.PHONY: all clean url-test | ||
|
||
all: clean test | ||
all: url-test | ||
|
||
clean: | ||
rm -rf url-test | ||
rm -rvf url-test *.o url_char_category_table.h | ||
|
||
test: test.c | ||
$(CC) -std=c99 -Wall -I. url.c $< -o url-test | ||
%.o: %.c | ||
$(CC) -g -std=c99 -Wall -c -o $@ $^ | ||
|
||
url.c: url.h url_char_category.h url_char_category_table.h | ||
|
||
url_char_category_table.h: gen_char_category_table | ||
./gen_char_category_table > url_char_category_table.h | ||
|
||
gen_char_category_table: gen_char_category_table.o | ||
$(CC) -g -std=c99 -Wall -o $@ $^ | ||
|
||
url-test: test.o url.o | ||
$(CC) -g -std=c99 -Wall -I. -o $@ $^ | ||
./url-test | ||
|
||
.PHONY: all clean test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// generated the array 'char_cat' which is #included and used in url.c | ||
|
||
#include <stdio.h> | ||
|
||
#include "url_char_category.h" | ||
|
||
unsigned char_cat[256]; | ||
|
||
// character categories according to RFC 3986: | ||
const char* const alnum="0123456789" | ||
"abcdefghijklmnopqrstuvwxyz" | ||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | ||
|
||
const char* const unreserved = "-._~"; | ||
const char* const subdelim = "!$&'()*+,;="; | ||
|
||
void fill(unsigned value, const char* alnum, const char* special) | ||
{ | ||
for(;*alnum;++alnum) | ||
{ | ||
char_cat[ (unsigned char)(*alnum) ] |= value; | ||
} | ||
|
||
for(;*special;++special) | ||
{ | ||
char_cat[ (unsigned char)(*special) ] |= value; | ||
} | ||
} | ||
|
||
|
||
void print_table() | ||
{ | ||
printf( | ||
"// This file is generated by gen_char_category_table. DO NOT EDIT IT BY HAND!\n" | ||
"\n" | ||
"static const unsigned short char_cat[256] = {\n" | ||
"// .0 .1 .2 .3 .4 .5 .6 .7 .8 .9 .A .B .C .D .E .F\n"); | ||
|
||
for(unsigned y=0; y<16; ++y) | ||
{ | ||
putchar(' '); | ||
for(unsigned x=0; x<16; ++x) | ||
{ | ||
const unsigned offset = y*16+x; | ||
printf(" 0x%03x%c", char_cat[offset], (offset==255 ? ' ' : ',') ); | ||
} | ||
printf(" // %01X0 ... %01XF\n", y, y); | ||
} | ||
printf("};\n\n"); | ||
} | ||
|
||
|
||
int main() | ||
{ | ||
fill( Scheme , alnum, "+-."); | ||
|
||
fill( Unreserved, alnum, unreserved); | ||
|
||
fill( GenDelim , alnum, ":/?#[]@"); | ||
|
||
fill( SubDelim , alnum, subdelim); | ||
|
||
fill( PCharSlash, alnum, ":@/%"); // part #1 | ||
fill( PCharSlash, unreserved, subdelim); // part #2 | ||
|
||
fill( HexDigit, "0123456789", "abcdef" "ABCDEF"); | ||
|
||
fill( Query, alnum, "/?:@%"); // part #1 | ||
fill( Query, unreserved, subdelim); // part #2 | ||
|
||
fill( Userinfo, alnum, ":%"); // part #1 | ||
fill( Userinfo, unreserved, subdelim); // part #2 | ||
|
||
fill( IPv6Char, "0123456789", "abcdef" "ABCDEF" ":"); | ||
|
||
print_table(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
|
||
#include <url.h> | ||
#include "url.h" | ||
#include <assert.h> | ||
#include <string.h> | ||
|
||
|
@@ -17,7 +17,7 @@ main (void) { | |
//url_inspect("https://google.com/search?q=github"); | ||
|
||
char *gh_url = "git://[email protected]:jwerle/url.h.git"; | ||
char *url = "http://user:[email protected]:8080/p/a/t/h?query=string#hash"; | ||
char *url = "http://user:[email protected]:8080/p/%C3%A5/t/h?qu%C3%ABry=strin%C4%9F&foo=bar=yuk&key%23%26%3D=%25&lol#h%C3%a6sh"; | ||
|
||
url_data_t *parsed = url_parse(url); | ||
url_data_t *gh_parsed = url_parse(gh_url); | ||
|
@@ -28,24 +28,19 @@ main (void) { | |
url_data_inspect(parsed); | ||
url_data_inspect(gh_parsed); | ||
|
||
assert(parsed->href); | ||
assert(parsed->auth); | ||
assert(parsed->whole_url); | ||
assert(parsed->protocol); | ||
assert(parsed->port); | ||
assert(parsed->hostname); | ||
assert(parsed->userinfo); | ||
assert(parsed->host); | ||
assert(parsed->pathname); | ||
assert(parsed->port); | ||
assert(parsed->path); | ||
assert(parsed->hash); | ||
assert(parsed->search); | ||
assert(parsed->query); | ||
assert(parsed->fragment); | ||
|
||
assert(gh_parsed->href); | ||
assert(gh_parsed->whole_url); | ||
assert(gh_parsed->protocol); | ||
assert(gh_parsed->userinfo); | ||
assert(gh_parsed->host); | ||
assert(gh_parsed->auth); | ||
assert(gh_parsed->hostname); | ||
assert(gh_parsed->pathname); | ||
assert(gh_parsed->path); | ||
|
||
assert(url_is_protocol("http")); | ||
|
@@ -57,21 +52,19 @@ main (void) { | |
assert(url_is_protocol("javascript")); | ||
|
||
STRING_ASSERT("http", url_get_protocol(url)); | ||
STRING_ASSERT("user:pass", url_get_auth (url)); | ||
STRING_ASSERT("subdomain.host.com:8080", url_get_hostname(url)); | ||
STRING_ASSERT("subdomain.host.com", url_get_host (url)); | ||
STRING_ASSERT("/p/a/t/h", url_get_pathname(url)); | ||
STRING_ASSERT("/p/a/t/h?query=string#hash", url_get_path (url)); | ||
STRING_ASSERT("?query=string", url_get_search (url)); | ||
STRING_ASSERT("query=string", url_get_query (url)); | ||
STRING_ASSERT("#hash", url_get_hash (url)); | ||
STRING_ASSERT("user:pass", url_get_userinfo(url)); | ||
STRING_ASSERT("subdomain.host.com", url_get_hostname(url)); | ||
STRING_ASSERT("/p/\xc3\xa5/t/h", url_get_path (url)); | ||
assert( strcmp("strin\xc4\x9f", url_get_query_value (parsed, "qu\xc3\xabry"))==0 ); | ||
assert( strcmp("bar=yuk", url_get_query_value (parsed, "foo"))==0 ); | ||
assert( strcmp("%", url_get_query_value (parsed, "key#&="))==0 ); | ||
assert( strcmp("", url_get_query_value (parsed, "lol"))==0 ); | ||
STRING_ASSERT("hæsh", url_get_fragment(url)); | ||
STRING_ASSERT("8080", url_get_port (url)); | ||
|
||
STRING_ASSERT("git", url_get_protocol(gh_url)); | ||
STRING_ASSERT("github.com", url_get_host (gh_url)); | ||
STRING_ASSERT("github.com", url_get_hostname(gh_url)); | ||
STRING_ASSERT("git", url_get_auth (gh_url)); | ||
STRING_ASSERT("jwerle/url.h.git", url_get_pathname(gh_url)); | ||
STRING_ASSERT("git", url_get_userinfo(gh_url)); | ||
STRING_ASSERT("jwerle/url.h.git", url_get_path (gh_url)); | ||
|
||
url_free(parsed); | ||
|
Oops, something went wrong.