Fixes for the w64-vulkan release

aagdev · Jul 13, 2024 · 07d0180 · 07d0180
1 parent c7baba6
commit 07d0180
Show file tree

Hide file tree

Showing 10 changed files with 59 additions and 31 deletions.
diff --git a/Makefile b/Makefile
@@ -24,6 +24,16 @@ cflags += -Wno-pedantic
 ldlibs += -lggml
 ldflags += -L$(GGML_LIB_PATH) -Wl,-rpath,$(GGML_LIB_PATH)
 
+## ggml scheduler is need for incomplete backends (e.g. Vulkan)
+ifndef MLIS_NO_GGML_SCHED
+mlimgsynth: cppflags += -DUSE_GGML_SCHED=1
+endif
+
+## Flash Attention (not working yet, crashes)
+ifdef MLIS_FLASH_ATTENTION
+mlimgsynth: cppflags += -DUSE_FLASH_ATTENTION
+endif
+
 # png
 ifndef MLIS_NO_PNG
 mlimgsynth: ldlibs += -lpng
@@ -43,8 +53,6 @@ rng-test: $(common) rng_philox.o rng-test.o
 
 st-util: $(common) ids.o tensorstore.o safetensors.o st-util.o
 
-#mlimgsynth: cppflags += -DUSE_FLASH_ATTENTION
-mlimgsynth: cppflags += -DUSE_GGML_SCHED=1
 mlimgsynth: $(common) ids.o localtensor.o tensorstore.o safetensors.o \
 	ggml_extend.o mlblock.o mlblock_nn.o rng_philox.o tae.o vae.o clip.o unet.o \
 	solvers.o util.o mlimgsynth.o
diff --git a/src/ccommon/image_io_png.c b/src/ccommon/image_io_png.c
@@ -293,8 +293,8 @@ int imgio_png_save_op(CodecPng* S, ImageIO* imgio, Image* img)
 	png_write_png(png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
 
 end:
-	alloc_free(IMAGE_IO_ALLOCATOR, texts);
 	alloc_free(IMAGE_IO_ALLOCATOR, row_pointers);
+	alloc_free(IMAGE_IO_ALLOCATOR, texts);
 	if (png_ptr)
 		png_destroy_write_struct(&png_ptr, info_ptr ? &info_ptr : NULL);
 	return R;

diff --git a/src/clip.c b/src/clip.c
@@ -10,6 +10,12 @@
 
 #define MLN(NAME,X)  mlctx_tensor_add(C, (NAME), (X))
 
+// The GGML scheduler have problems with inplace operations (2024-07-13)
+#if USE_GGML_SCHED
+	#define ggml_gelu_inplace  ggml_gelu
+	#define ggml_gelu_quick_inplace  ggml_gelu_quick
+#endif
+
 const ClipParams g_clip_vit_l_14 = {
 	.n_vocab	= 49408,
 	.n_token	= 77,
@@ -255,19 +261,9 @@ MLTensor* mlb_clip_mlp(MLCtx* C, MLTensor* x,
 
 	x = MLN("fc1", mlb_nn_linear(C, x, n_interm, true));
 	if (d_model == 1024 || d_model == 1280) {  //SD2 or SDXL
-		// The GGML scheduler have problems with inplace operations
-#if USE_GGML_SCHED
-		x = ggml_gelu(C->cc, x);
-#else
 		x = ggml_gelu_inplace(C->cc, x);
-#endif
 	} else {  //SD1
-		// The GGML scheduler have problems with inplace operations
-#if USE_GGML_SCHED
-		x = ggml_gelu_quick(C->cc, x);
-#else
 		x = ggml_gelu_quick_inplace(C->cc, x);
-#endif
 	}
 	x = MLN("fc2", mlb_nn_linear(C, x, d_model, true));
 	return x;

diff --git a/src/mlblock.c b/src/mlblock.c
@@ -170,6 +170,10 @@ int mlctx_alloc(MLCtx* C)
 	if (!ggml_backend_sched_alloc_graph(C->sched, C->graph))
 		ERROR_LOG(-1, "ggml_backend_sched_alloc_graph");
 
+	mllog_debug("ggml sched splits:%d copies:%d",
+		ggml_backend_sched_get_n_splits(C->sched),
+		ggml_backend_sched_get_n_copies(C->sched) );
+
 	C->info.mem_compute = 0;
 	for (int i=0; i<n_bk; ++i) {
 		size_t s = ggml_backend_sched_get_buffer_size(C->sched, bk_list[i]);
@@ -180,7 +184,7 @@ int mlctx_alloc(MLCtx* C)
 	C->info.mem_total = C->info.mem_params + C->info.mem_compute;
 #endif
 
-	mllog_info("%s memory use: %.1fMiB (params), %.1fMiB (compute)",
+	mllog_info("%s memory usage: %.1fMiB (params), %.1fMiB (compute)",
 		C->c.name, C->info.mem_params * F_MIB, C->info.mem_compute * F_MIB);
 
 end:

diff --git a/src/mlblock_nn.c b/src/mlblock_nn.c
@@ -6,6 +6,12 @@
 #define T  true
 #define MLN(NAME,X)  mlctx_tensor_add(C, (NAME), (X))
 
+// The GGML scheduler have problems with inplace operations (2024-07-13)
+#if USE_GGML_SCHED
+	#define ggml_gelu_inplace  ggml_gelu
+	#define ggml_silu_inplace  ggml_silu
+#endif
+
 //ref: pytorch.nn.Linear
 MLTensor* mlb_nn_linear(MLCtx* C, MLTensor* x, int n_out, bool bias)
 {
@@ -157,12 +163,8 @@ MLTensor* mlb_GEGLU(MLCtx* C, MLTensor* x, int d_out)
 	x = MLN("proj", mlb_nn_linear(C, x, d_out*2, true));
 	// [ne3, ne2, ne1, d_out*2]
 	ggml_chunk(C->cc, x, 2, 0, &x, &g);
-		// The GGML scheduler have problems with inplace operations
-#if USE_GGML_SCHED
-	g = ggml_gelu(C->cc, g);
-#else
+	g = ggml_cont(C->cc, g);
 	g = ggml_gelu_inplace(C->cc, g);
-#endif
 	x = ggml_mul(C->cc, x, g);
 	// [ne3, ne2, ne1, d_out]
 	return x;

diff --git a/src/mlimgsynth.c b/src/mlimgsynth.c
@@ -140,6 +140,8 @@ int mlis_args_load(MLImgSynthApp* S, int argc, char* argv[])
 		return 1;
 	}
 
+	//TODO: validate input ranges
+
 	int i, j;
 	for (i=1; i<argc; ++i) {
 		char * arg = argv[i];
@@ -230,12 +232,12 @@ int mlis_args_load(MLImgSynthApp* S, int argc, char* argv[])
 			return 1;
 		}
 	}
-
+	
 	// Save the path of the direction where the binary is located.
 	// May be used later to look for related files.
 	if (argv[0] && argv[0][0]) {
 		const char *tail = path_tail(argv[0]);  // "dir/file" -> "file"
-		if (tail+1 > argv[0]) {
+		if (tail > argv[0]+1) {
 			dstr_copy(S->path_bin, tail - argv[0] - 1, argv[0]);
 			log_debug("bin path: %s", S->path_bin);
 			assert( file_exists(S->path_bin) );
@@ -531,12 +533,12 @@ int mlis_ml_init(MLImgSynthApp* S)
 {
 	int R=1;
 	assert(!S->ctx.backend);
-
+	
 	S->ctx.c.wtype = GGML_TYPE_F16;
 	S->ctx.tstore = &S->tstore;
 
 	// Backend init
-	if (S->c.backend)
+	if (S->c.backend && S->c.backend[0])
 		S->ctx.backend = ggml_backend_reg_init_backend_from_str(S->c.backend);	
 	else 
 		S->ctx.backend = ggml_backend_cpu_init();
@@ -928,6 +930,7 @@ int mlis_generate(MLImgSynthApp* S)
 	int R=1;
 	UnetState ctx={0};
 	Image img={0};
+	DynStr infotxt=NULL;
 	LocalTensor latent={0}, noise={0}, 
 	            cond={0}, label={0},
 				uncond={0}, unlabel={0};
@@ -992,7 +995,7 @@ int mlis_generate(MLImgSynthApp* S)
 		if (S->unet_p->uncond_empty_zero && !(S->c.nprompt && S->c.nprompt[0]))
 			ltensor_for(uncond,i,0) uncond.d[i] = 0;
 	}
-	else if (S->c.nprompt)
+	else if (S->c.nprompt && S->c.nprompt[0])
 		log_warning("negative prompt provided but CFG is not enabled");	
 
 	debug_ltensor_stats(&uncond, "uncond");
@@ -1095,9 +1098,8 @@ int mlis_generate(MLImgSynthApp* S)
 	}
 
 	// Save latent
-	const char *path_out = S->c.path_out;
-	IFFALSESET(path_out, "latent-out.tensor");
-	ltensor_save_path(&latent, path_out);
+	const char *path_latent = "latent-out.tensor";  //TODO: option
+	ltensor_save_path(&latent, path_latent);
 
 	mlctx_free(&S->ctx);  //free memory
 
@@ -1107,7 +1109,6 @@ int mlis_generate(MLImgSynthApp* S)
 
 	// Make info text
 	// Imitates stable-diffusion-webui create_infotext
-	DynStr infotxt=NULL;
 	if (S->c.prompt)
 		dstr_printfa(infotxt, "%s\n", S->c.prompt);
 	//TODO: input latent or image filename?
@@ -1137,9 +1138,9 @@ int mlis_generate(MLImgSynthApp* S)
 	IFFALSESET(S->c.path_out, "output.png");
 	log_debug("Writing image to '%s'", S->c.path_out);
 	TRY( img_save_file_info(&img, S->c.path_out, "parameters", infotxt) );
-	dstr_free(infotxt);
 
 end:
+	dstr_free(infotxt);
 	img_free(&img);
 	ltensor_free(&noise);
 	ltensor_free(&uncond);
@@ -1150,7 +1151,7 @@ int mlis_generate(MLImgSynthApp* S)
 }
 
 /* Checks all the operations with deterministic inputs and prints the
- * resulting tensor sums. Useful to easily check if any broke down
+ * resulting tensor sums. Useful to easily check if anything broke down
  * during development. The tests are independent of each other.
  */
 int mlis_check(MLImgSynthApp* S)

diff --git a/src/tae.c b/src/tae.c
@@ -8,6 +8,12 @@
 #define F  0  //false
 #define MLN(NAME,X)  mlctx_tensor_add(C, (NAME), (X))
 
+// The GGML scheduler have problems with inplace operations (2024-07-13)
+#if USE_GGML_SCHED
+	#define ggml_relu_inplace  ggml_relu
+	#define ggml_tanh_inplace  ggml_tanh
+#endif
+
 const SdTaeParams g_sdtae_sd1 = {
 	.ch_x     = 3,
 	.ch_inner = 64,

diff --git a/src/unet.c b/src/unet.c
@@ -10,6 +10,11 @@
 #define T  true
 #define MLN(NAME,X)  mlctx_tensor_add(C, (NAME), (X))
 
+// The GGML scheduler have problems with inplace operations (2024-07-13)
+#if USE_GGML_SCHED
+	#define ggml_silu_inplace  ggml_silu
+#endif
+
 float g_log_sigmas_sd[1000];
 
 const UnetParams g_unet_sd1 = {

diff --git a/src/util.c b/src/util.c
@@ -31,7 +31,8 @@ int img_save_file_info(const Image* img, const char* path,
 			log_warning("Could not write '%s' in '%s'", info_key, path);
 	}
 
-	TRY( imgio_save(&imgio, img) );
+	TRY_LOG( imgio_save(&imgio, img),
+		"Could not save image in '%s'", path );
 
 end:
 	dstr_free(tmps);

diff --git a/src/vae.c b/src/vae.c
@@ -13,6 +13,11 @@
 #define T  true
 #define MLN(NAME,X)  mlctx_tensor_add(C, (NAME), (X))
 
+// The GGML scheduler have problems with inplace operations (2024-07-13)
+#if USE_GGML_SCHED
+	#define ggml_silu_inplace  ggml_silu
+#endif
+
 const VaeParams g_vae_sd1 = {
 	.ch_x			= 3, 
 	.ch_z			= 4,