Skip to content

Commit

Permalink
* add hack for utf16 to ascii
Browse files Browse the repository at this point in the history
* remove debug statements
* add invalid tags feedback
  • Loading branch information
ibejohn818 committed Sep 26, 2024
1 parent d9aabe6 commit 01023b7
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 27 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ dist/
spec/
compile_commands.json
examples/bin
.data/
1 change: 1 addition & 0 deletions src/include/id3v2/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ typedef struct {
id3v2_frame_t *frame;
id3v2_frame_encoding_t encoding;
char *text;
size_t size;
} id3v2_frame_text_t;

typedef struct {
Expand Down
13 changes: 13 additions & 0 deletions src/include/id3v2/unicode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef ID3V2_UNICODE_H
#define ID3V2_UNICODE_H

#include <stdio.h>

/**
* FIXME: hack for utf16 strings
*/
void utf16_to_ascii(const char *buf, size_t buf_size, char **out, size_t *out_size);


#endif // ID3V2_UNICODE_H

3 changes: 2 additions & 1 deletion src/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ pkg_search_module(LIBSODIUM OPTIONAL libsodium)


set(SOURCES
# ${id3v2lib_SOURCE_DIR}/src/include/id3v2/types.h
synch.c
helpers.c
unicode.c
id3v2.c
)

Expand Down Expand Up @@ -53,6 +53,7 @@ message("-- CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message("-- CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message("-- CMAKE_SYSTEM: ${CMAKE_SYSTEM}")
message("-- CMAKE_OS_NAME: ${CMAKE_OS_NAME}")
message("-- CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
message("-- HAS_APT: ${HAS_APT}")
message("-- HAS_RPM: ${HAS_RPM}")

Expand Down
72 changes: 53 additions & 19 deletions src/lib/id3v2.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "id3v2.h"
#include "id3v2/synch.h"
#include "id3v2/types.h"
#include "id3v2/unicode.h"

static id3v2_frame_list_t *scan_frames(id3v2_tag_t *t);

Expand All @@ -22,13 +23,15 @@ id3v2_tag_t *id3v2_from_file(const char *file_name) {

if (fread(&header, sizeof(char), ID3_HEADER, fp) != ID3_HEADER) {
puts("unable to read id3 header");
exit(1);
// exit(1);
goto error;
}

// check id3 tag marker
if ((header[0] != 'I') || (header[1] != 'D') || (header[2] != '3')) {
puts("invalid id3 header");
exit(1);
// exit(1);
goto error;
}

// get major version_major byte
Expand All @@ -41,7 +44,7 @@ id3v2_tag_t *id3v2_from_file(const char *file_name) {
// decode the tag size
t->tag_size = synch_decode(int_decode(header, 4, 6));

printf("Tag size: %u \n", t->tag_size);
// fprintf(stderr, "Tag size: %u \n", t->tag_size);

fseek(fp, 10, SEEK_SET);

Expand All @@ -62,6 +65,13 @@ id3v2_tag_t *id3v2_from_file(const char *file_name) {
fclose(fp);

return t;

error:
if (t != NULL) {
id3v2_tag_free(t);
}
fclose(fp);
return NULL;
}

id3v2_tag_t *id3v2_from_buffer(const char *buf) {
Expand Down Expand Up @@ -116,7 +126,7 @@ id3v2_frame_t *parse_frame(id3v2_tag_t *t, size_t *cursor_pos) {

// validate the tag and return NULL for invalid
if (!_id3v2_validate_frame_tag(tag)) {
printf("Invalid frame tag: position: %lu Tag: %s \n", *cursor_pos, tag);
// fprintf(stderr, "Invalid frame tag: position: %lu Tag: %s \n", *cursor_pos, tag);
return NULL;
}

Expand All @@ -130,23 +140,34 @@ id3v2_frame_t *parse_frame(id3v2_tag_t *t, size_t *cursor_pos) {
*cursor_pos += 4;

f->size = int_decode((unsigned char *)t->tag_buffer, 4, *cursor_pos);
// fprintf(stderr, "frame size: %d \n", f->size);
if (t->version_major == 4) {
// version_major 4 gets the synch safe size
f->size = synch_decode(f->size);
fprintf(stderr, "sync safe frame: %d \n", f->size);
}

// move the cursor forward
*cursor_pos += 4;

// get the flags
memcpy(&f->flags, t->tag_buffer + *cursor_pos, 2);

*cursor_pos += 2;

// allocate and copy the frame buffer
f->buffer = calloc(f->size, sizeof(char));
memcpy(f->buffer, t->tag_buffer + *cursor_pos, f->size);
// if (strcmp(tag, "TIT2") == 0) {
// fprintf(stderr, "Found title \n");
// fprintf(stderr, "flags: %x %x\n", f->flags[0], f->flags[1]);
// fprintf(stderr, "first bytes: %x %x\n", (t->tag_buffer + *cursor_pos)[0], (t->tag_buffer + *cursor_pos)[1]);
// for(size_t ii=0; ii < f->size; ii++) {
// fprintf(stderr, "0x%.2x\n", (t->tag_buffer + *cursor_pos)[ii]);
// }
// }

// allocate and copy the frame buffer
// f->buffer = calloc(f->size + 1, sizeof(char));
f->buffer = malloc(sizeof(char) * f->size);
memcpy(f->buffer, t->tag_buffer + (*cursor_pos), (size_t)f->size);

*cursor_pos += f->size;
// printf("func cursor pos: %lu \n", *cursor_pos);

Expand Down Expand Up @@ -205,39 +226,51 @@ id3v2_frame_text_t *id3v2_frame_text(id3v2_frame_t *f) {

// buffer position
size_t cursor = 0;
// size of the null terminator to apply
size_t null_term_size = 1;

if (f->size <= 2) {
t->text = NULL;
t->encoding = LATIN1;
return t;
}

// determine encoding
// TODO: utf byte order marker?
// FIXME: using a hack for utf16, we should make this
// more robust or use a library
switch (f->buffer[cursor]) {
case 0x00:
t->encoding = LATIN1;
break;
case 0x01:
t->encoding = UTF16;
null_term_size = 2;
break;
case 0x02:
t->encoding = UTF16BE;
null_term_size = 2;
break;
case 0x03:
t->encoding = UTF8;
null_term_size = 2;
break;
default:
// printf("Illegal encoding for frame: %s \n", f->tag);
t->encoding = LATIN1;
}
cursor++;

if (f->size > 2) {
printf("text frame size: %u \n", f->size);
t->text = calloc(f->size, sizeof(char));
switch(t->encoding) {
case UTF16:{
char *out = NULL;
size_t out_size = 0;
utf16_to_ascii(f->buffer, f->size, &out, &out_size);
t->encoding = LATIN1;
t->text = out;
t->size = out_size;
// fprintf(stderr, "Output: %s\n", t->text);
break;
}
default: // LATIN1
t->text = malloc(sizeof(char) * f->size);
t->size = f->size;
memcpy(t->text, f->buffer + cursor, f->size);
} else {
t->text = NULL;
break;
}

return t;
Expand Down Expand Up @@ -435,6 +468,7 @@ static void write_frame_header(id3v2_tag_t *t, id3v2_frame_t *f,
void id3v2_tag_write_to_buffer(id3v2_tag_t *t, unsigned char **buffer,
size_t *size) {

// start with the known header size
size_t total = 10;
total += id3v2_tag_total_frame_size(t);

Expand Down
26 changes: 26 additions & 0 deletions src/lib/unicode.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include "id3v2/unicode.h"
#include <stdlib.h>
#include <string.h>


void utf16_to_ascii(const char *buf, size_t buf_size, char **out, size_t *out_size) {
// create output
*out_size = (buf_size / 2);
char *o = (char *)malloc(sizeof(char) * (*out_size));
memset(o, 0, *out_size);

// output byte index
size_t cidx = 0;

for(size_t i=3; i < buf_size; i++) {
if (buf[i] == 0x00) {
continue;
}
o[cidx] = buf[i];
cidx++;
}

// swap address with given buffer
*out = o;

}
3 changes: 3 additions & 0 deletions src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ include_directories(../lib)
add_executable(${PROJECT_NAME} ${SOURCES})

target_link_libraries(${PROJECT_NAME} m id3v2)

add_executable(${PROJECT_NAME}_unicode unicode.c)
target_link_libraries(${PROJECT_NAME}_unicode m id3v2)
20 changes: 13 additions & 7 deletions src/test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void write_id3_buffer(id3v2_tag_t *t, const char *save_to,
fclose(fp);
}

void utf16_to_ascii(const unsigned char *utf16_buf, size_t utf16_size,
void ___utf16_to_ascii(const unsigned char *utf16_buf, size_t utf16_size,
unsigned char **buf, size_t *size) {

// check if there is a BOM, if so we skip
Expand Down Expand Up @@ -160,12 +160,12 @@ int main(int argc, char **argv) {

id3v2_frame_text_t *tt;

id3v2_tag_write_artist(tag, "Super Sublime");
tt = id3v2_tag_artist(tag);
if (tt != NULL) {
printf("Artist: %s \n", tt->text);
id3v2_tag_free_text_frame(tt);
}
// id3v2_tag_write_artist(tag, "Super Sublime");
// tt = id3v2_tag_artist(tag);
// if (tt != NULL) {
// printf("Artist: %s \n", tt->text);
// id3v2_tag_free_text_frame(tt);
// }

tt = id3v2_tag_album(tag);
if (tt != NULL) {
Expand All @@ -179,6 +179,12 @@ int main(int argc, char **argv) {
id3v2_tag_free_text_frame(tt);
}

tt = id3v2_tag_title(tag);
if (tt != NULL) {
printf("Title: %s \n", tt->text);
id3v2_tag_free_text_frame(tt);
}

// update title
id3v2_tag_write_title(tag, "an updated title");
tt = id3v2_tag_title(tag);
Expand Down
26 changes: 26 additions & 0 deletions src/test/unicode.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include <stdio.h>
#include <stdlib.h>
#include "../include/id3v2/unicode.h"
/*
test a utf16 hack that only removes 0 byte indexes
*/

static char tb[] = {
0x01, 0xff, 0xfe, 0x4c, 0x00, 0x6f, 0x00, 0x6f, 0x00, 0x6b, 0x00, 0x20,
0x00, 0x41, 0x00, 0x74, 0x00, 0x20, 0x00, 0x4d, 0x00, 0x65, 0x00, 0x20,
0x00, 0x4e, 0x00, 0x6f, 0x00, 0x77, 0x00, 0x20, 0x00, 0x28, 0x00, 0x66,
0x00, 0x65, 0x00, 0x61, 0x00, 0x74, 0x00, 0x75, 0x00, 0x72, 0x00, 0x69,
0x00, 0x6e, 0x00, 0x67, 0x00, 0x20, 0x00, 0x4c, 0x00, 0x69, 0x00, 0x6c,
0x00, 0x20, 0x00, 0x57, 0x00, 0x61, 0x00, 0x79, 0x00, 0x6e, 0x00, 0x65,
0x00, 0x20, 0x00, 0x26, 0x00, 0x20, 0x00, 0x42, 0x00, 0x75, 0x00, 0x73,
0x00, 0x74, 0x00, 0x61, 0x00, 0x20, 0x00, 0x52, 0x00, 0x68, 0x00, 0x79,
0x00, 0x6d, 0x00, 0x65, 0x00, 0x73, 0x00, 0x29, 0x00};

int main(int argc, char **argv) {
puts("unicode main");
char *out = NULL;
size_t out_size = 0;
utf16_to_ascii(tb, 105, &out, &out_size);
fprintf(stderr, "Size: %zu, Text: %s\n", out_size, out);
return EXIT_SUCCESS;
}

0 comments on commit 01023b7

Please sign in to comment.