Skip to content

Commit

Permalink
Fixes for the w64-vulkan release
Browse files Browse the repository at this point in the history
  • Loading branch information
aagarcia committed Jul 13, 2024
1 parent c7baba6 commit 07d0180
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 31 deletions.
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ cflags += -Wno-pedantic
ldlibs += -lggml
ldflags += -L$(GGML_LIB_PATH) -Wl,-rpath,$(GGML_LIB_PATH)

## ggml scheduler is need for incomplete backends (e.g. Vulkan)
ifndef MLIS_NO_GGML_SCHED
mlimgsynth: cppflags += -DUSE_GGML_SCHED=1
endif

## Flash Attention (not working yet, crashes)
ifdef MLIS_FLASH_ATTENTION
mlimgsynth: cppflags += -DUSE_FLASH_ATTENTION
endif

# png
ifndef MLIS_NO_PNG
mlimgsynth: ldlibs += -lpng
Expand All @@ -43,8 +53,6 @@ rng-test: $(common) rng_philox.o rng-test.o

st-util: $(common) ids.o tensorstore.o safetensors.o st-util.o

#mlimgsynth: cppflags += -DUSE_FLASH_ATTENTION
mlimgsynth: cppflags += -DUSE_GGML_SCHED=1
mlimgsynth: $(common) ids.o localtensor.o tensorstore.o safetensors.o \
ggml_extend.o mlblock.o mlblock_nn.o rng_philox.o tae.o vae.o clip.o unet.o \
solvers.o util.o mlimgsynth.o
2 changes: 1 addition & 1 deletion src/ccommon/image_io_png.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ int imgio_png_save_op(CodecPng* S, ImageIO* imgio, Image* img)
png_write_png(png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);

end:
alloc_free(IMAGE_IO_ALLOCATOR, texts);
alloc_free(IMAGE_IO_ALLOCATOR, row_pointers);
alloc_free(IMAGE_IO_ALLOCATOR, texts);
if (png_ptr)
png_destroy_write_struct(&png_ptr, info_ptr ? &info_ptr : NULL);
return R;
Expand Down
16 changes: 6 additions & 10 deletions src/clip.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@

#define MLN(NAME,X) mlctx_tensor_add(C, (NAME), (X))

// The GGML scheduler have problems with inplace operations (2024-07-13)
#if USE_GGML_SCHED
#define ggml_gelu_inplace ggml_gelu
#define ggml_gelu_quick_inplace ggml_gelu_quick
#endif

const ClipParams g_clip_vit_l_14 = {
.n_vocab = 49408,
.n_token = 77,
Expand Down Expand Up @@ -255,19 +261,9 @@ MLTensor* mlb_clip_mlp(MLCtx* C, MLTensor* x,

x = MLN("fc1", mlb_nn_linear(C, x, n_interm, true));
if (d_model == 1024 || d_model == 1280) { //SD2 or SDXL
// The GGML scheduler have problems with inplace operations
#if USE_GGML_SCHED
x = ggml_gelu(C->cc, x);
#else
x = ggml_gelu_inplace(C->cc, x);
#endif
} else { //SD1
// The GGML scheduler have problems with inplace operations
#if USE_GGML_SCHED
x = ggml_gelu_quick(C->cc, x);
#else
x = ggml_gelu_quick_inplace(C->cc, x);
#endif
}
x = MLN("fc2", mlb_nn_linear(C, x, d_model, true));
return x;
Expand Down
6 changes: 5 additions & 1 deletion src/mlblock.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ int mlctx_alloc(MLCtx* C)
if (!ggml_backend_sched_alloc_graph(C->sched, C->graph))
ERROR_LOG(-1, "ggml_backend_sched_alloc_graph");

mllog_debug("ggml sched splits:%d copies:%d",
ggml_backend_sched_get_n_splits(C->sched),
ggml_backend_sched_get_n_copies(C->sched) );

C->info.mem_compute = 0;
for (int i=0; i<n_bk; ++i) {
size_t s = ggml_backend_sched_get_buffer_size(C->sched, bk_list[i]);
Expand All @@ -180,7 +184,7 @@ int mlctx_alloc(MLCtx* C)
C->info.mem_total = C->info.mem_params + C->info.mem_compute;
#endif

mllog_info("%s memory use: %.1fMiB (params), %.1fMiB (compute)",
mllog_info("%s memory usage: %.1fMiB (params), %.1fMiB (compute)",
C->c.name, C->info.mem_params * F_MIB, C->info.mem_compute * F_MIB);

end:
Expand Down
12 changes: 7 additions & 5 deletions src/mlblock_nn.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
#define T true
#define MLN(NAME,X) mlctx_tensor_add(C, (NAME), (X))

// The GGML scheduler have problems with inplace operations (2024-07-13)
#if USE_GGML_SCHED
#define ggml_gelu_inplace ggml_gelu
#define ggml_silu_inplace ggml_silu
#endif

//ref: pytorch.nn.Linear
MLTensor* mlb_nn_linear(MLCtx* C, MLTensor* x, int n_out, bool bias)
{
Expand Down Expand Up @@ -157,12 +163,8 @@ MLTensor* mlb_GEGLU(MLCtx* C, MLTensor* x, int d_out)
x = MLN("proj", mlb_nn_linear(C, x, d_out*2, true));
// [ne3, ne2, ne1, d_out*2]
ggml_chunk(C->cc, x, 2, 0, &x, &g);
// The GGML scheduler have problems with inplace operations
#if USE_GGML_SCHED
g = ggml_gelu(C->cc, g);
#else
g = ggml_cont(C->cc, g);
g = ggml_gelu_inplace(C->cc, g);
#endif
x = ggml_mul(C->cc, x, g);
// [ne3, ne2, ne1, d_out]
return x;
Expand Down
23 changes: 12 additions & 11 deletions src/mlimgsynth.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ int mlis_args_load(MLImgSynthApp* S, int argc, char* argv[])
return 1;
}

//TODO: validate input ranges

int i, j;
for (i=1; i<argc; ++i) {
char * arg = argv[i];
Expand Down Expand Up @@ -230,12 +232,12 @@ int mlis_args_load(MLImgSynthApp* S, int argc, char* argv[])
return 1;
}
}

// Save the path of the direction where the binary is located.
// May be used later to look for related files.
if (argv[0] && argv[0][0]) {
const char *tail = path_tail(argv[0]); // "dir/file" -> "file"
if (tail+1 > argv[0]) {
if (tail > argv[0]+1) {
dstr_copy(S->path_bin, tail - argv[0] - 1, argv[0]);
log_debug("bin path: %s", S->path_bin);
assert( file_exists(S->path_bin) );
Expand Down Expand Up @@ -531,12 +533,12 @@ int mlis_ml_init(MLImgSynthApp* S)
{
int R=1;
assert(!S->ctx.backend);

S->ctx.c.wtype = GGML_TYPE_F16;
S->ctx.tstore = &S->tstore;

// Backend init
if (S->c.backend)
if (S->c.backend && S->c.backend[0])
S->ctx.backend = ggml_backend_reg_init_backend_from_str(S->c.backend);
else
S->ctx.backend = ggml_backend_cpu_init();
Expand Down Expand Up @@ -928,6 +930,7 @@ int mlis_generate(MLImgSynthApp* S)
int R=1;
UnetState ctx={0};
Image img={0};
DynStr infotxt=NULL;
LocalTensor latent={0}, noise={0},
cond={0}, label={0},
uncond={0}, unlabel={0};
Expand Down Expand Up @@ -992,7 +995,7 @@ int mlis_generate(MLImgSynthApp* S)
if (S->unet_p->uncond_empty_zero && !(S->c.nprompt && S->c.nprompt[0]))
ltensor_for(uncond,i,0) uncond.d[i] = 0;
}
else if (S->c.nprompt)
else if (S->c.nprompt && S->c.nprompt[0])
log_warning("negative prompt provided but CFG is not enabled");

debug_ltensor_stats(&uncond, "uncond");
Expand Down Expand Up @@ -1095,9 +1098,8 @@ int mlis_generate(MLImgSynthApp* S)
}

// Save latent
const char *path_out = S->c.path_out;
IFFALSESET(path_out, "latent-out.tensor");
ltensor_save_path(&latent, path_out);
const char *path_latent = "latent-out.tensor"; //TODO: option
ltensor_save_path(&latent, path_latent);

mlctx_free(&S->ctx); //free memory

Expand All @@ -1107,7 +1109,6 @@ int mlis_generate(MLImgSynthApp* S)

// Make info text
// Imitates stable-diffusion-webui create_infotext
DynStr infotxt=NULL;
if (S->c.prompt)
dstr_printfa(infotxt, "%s\n", S->c.prompt);
//TODO: input latent or image filename?
Expand Down Expand Up @@ -1137,9 +1138,9 @@ int mlis_generate(MLImgSynthApp* S)
IFFALSESET(S->c.path_out, "output.png");
log_debug("Writing image to '%s'", S->c.path_out);
TRY( img_save_file_info(&img, S->c.path_out, "parameters", infotxt) );
dstr_free(infotxt);

end:
dstr_free(infotxt);
img_free(&img);
ltensor_free(&noise);
ltensor_free(&uncond);
Expand All @@ -1150,7 +1151,7 @@ int mlis_generate(MLImgSynthApp* S)
}

/* Checks all the operations with deterministic inputs and prints the
* resulting tensor sums. Useful to easily check if any broke down
* resulting tensor sums. Useful to easily check if anything broke down
* during development. The tests are independent of each other.
*/
int mlis_check(MLImgSynthApp* S)
Expand Down
6 changes: 6 additions & 0 deletions src/tae.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
#define F 0 //false
#define MLN(NAME,X) mlctx_tensor_add(C, (NAME), (X))

// The GGML scheduler have problems with inplace operations (2024-07-13)
#if USE_GGML_SCHED
#define ggml_relu_inplace ggml_relu
#define ggml_tanh_inplace ggml_tanh
#endif

const SdTaeParams g_sdtae_sd1 = {
.ch_x = 3,
.ch_inner = 64,
Expand Down
5 changes: 5 additions & 0 deletions src/unet.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
#define T true
#define MLN(NAME,X) mlctx_tensor_add(C, (NAME), (X))

// The GGML scheduler have problems with inplace operations (2024-07-13)
#if USE_GGML_SCHED
#define ggml_silu_inplace ggml_silu
#endif

float g_log_sigmas_sd[1000];

const UnetParams g_unet_sd1 = {
Expand Down
3 changes: 2 additions & 1 deletion src/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ int img_save_file_info(const Image* img, const char* path,
log_warning("Could not write '%s' in '%s'", info_key, path);
}

TRY( imgio_save(&imgio, img) );
TRY_LOG( imgio_save(&imgio, img),
"Could not save image in '%s'", path );

end:
dstr_free(tmps);
Expand Down
5 changes: 5 additions & 0 deletions src/vae.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
#define T true
#define MLN(NAME,X) mlctx_tensor_add(C, (NAME), (X))

// The GGML scheduler have problems with inplace operations (2024-07-13)
#if USE_GGML_SCHED
#define ggml_silu_inplace ggml_silu
#endif

const VaeParams g_vae_sd1 = {
.ch_x = 3,
.ch_z = 4,
Expand Down

0 comments on commit 07d0180

Please sign in to comment.