From ffbff078b51aba1c1e3bd920358fb0afecd3b921 Mon Sep 17 00:00:00 2001 From: CarlGao4 Date: Fri, 10 May 2024 01:38:25 +0800 Subject: [PATCH] ipex 2.1.30 --- GUI/GuiMain.py | 20 +++++++-- GUI/find_device_win.py | 96 +++++++++++++++++++++++++++++++++++++++++- GUI/separator.py | 19 +++++++-- GUI/shared.py | 25 +++++++++++ MKL-AOT.md | 29 ++++++++++--- 5 files changed, 175 insertions(+), 14 deletions(-) diff --git a/GUI/GuiMain.py b/GUI/GuiMain.py index 50deacf..f6c88e5 100644 --- a/GUI/GuiMain.py +++ b/GUI/GuiMain.py @@ -1,4 +1,4 @@ -__version__ = "1.2a1" +__version__ = "1.2b1" LICENSE = f"""Demucs-GUI {__version__} Copyright (C) 2022-2024 Carl Gao, Jize Guo, Rosario S.E. @@ -124,6 +124,7 @@ from PySide6_modified import ( Action, DelegateCombiner, + DoNothingDelegate, ExpandingQPlainTextEdit, FileNameDelegate, PercentSpinBoxDelegate, @@ -1072,6 +1073,8 @@ def save(self, file: pathlib.Path, origin, tensor, save_func, item, finishCallba finishCallback(shared.FileStatus.Writing, item) shared.AddHistory("save_location", value=self.loc_input.currentText()) for stem, stem_data in main_window.mixer.mix(origin, tensor): + if separator.np.isnan(stem_data).any() or separator.np.isinf(stem_data).any(): + logging.warning("NaN or inf found in stem %s" % stem) match self.encoder_group.checkedId(): case 0: file_ext = self.file_format.currentText() @@ -1437,6 +1440,12 @@ def resume(self): shared.FileStatus.Cancelled, shared.FileStatus.Failed, ]: + if self.table.item(i, 1).data(Qt.ItemDataRole.UserRole)[0] in [ + shared.FileStatus.Cancelled, + shared.FileStatus.Failed, + ]: + self.queue_length += 1 + main_window.updateQueueLength() self.table.item(i, 1).setData(Qt.ItemDataRole.UserRole, [shared.FileStatus.Queued]) self.table.item(i, 1).setData(ProgressDelegate.TextRole, "Queued") @@ -1541,6 +1550,9 @@ def __init__(self): PercentSpinBoxDelegate(minimum=-500, maximum=500, step=1), lambda x: x.column() > 1 and x.row() >= len(main_window.separator.sources) * 3, ) + self.delegate.addDelegate( + DoNothingDelegate(), lambda x: x.column() >= 1 and x.row() < len(main_window.separator.sources) * 3 + ) self.outputs_table.setItemDelegate(self.delegate) default_preset = shared.GetHistory("default_preset", self.preset_stem_key, default=None, autoset=False) @@ -1751,9 +1763,9 @@ def applyPreset(self): for stem, sources, enabled in preset[1]: # Calculate weights first # The order of sources is not guaranteed, so we need to use the index of the source - weights = [stem, sources["origin"]] + weights = [stem, f"{sources['origin']}%\u3000"] for source in main_window.separator.sources: - weights.append(sources[source]) + weights.append(f"{sources[source]}%\u3000") self.outputs_table.addRow(weights, enabled) def duplicateSelected(self): @@ -1949,7 +1961,7 @@ def startSeparation(self): self.start_button.setEnabled(True) main_window.save_options.encoder_ffmpeg_box.setEnabled(True) main_window.setStatusText.emit("No more file to separate") - separator.empty_cuda_cache() + separator.empty_cache() return file = main_window.file_queue.table.item(index, 0).data(Qt.ItemDataRole.UserRole) item = main_window.file_queue.table.item(index, 1) diff --git a/GUI/find_device_win.py b/GUI/find_device_win.py index 6467c37..26fe4b0 100644 --- a/GUI/find_device_win.py +++ b/GUI/find_device_win.py @@ -77,7 +77,77 @@ "12.57.0": {"4F85", "4F86", "5696", "5697", "56A3", "56A4", "56B2", "56B3"}, "12.58.0": {"4F8C", "5698", "5699", "569A", "56A7", "56A8"}, "12.59.0": {"4F89", "56A9", "56AA"}, - } + }, + "2.1.30": { + "12.0.0": {"9A40", "9A49", "9A59", "9A60", "9A68", "9A70", "9A78", "FF20"}, + "12.1.0": {"4C80", "4C8A", "4C8B", "4C8C", "4C90", "4C9A"}, + "12.2.0": { + "4680", + "4682", + "4688", + "468A", + "4690", + "4692", + "4693", + "A780", + "A781", + "A782", + "A783", + "A788", + "A789", + "A78B", + }, + "12.3.0": { + "4626", + "4628", + "462A", + "46A0", + "46A1", + "46A2", + "46A3", + "46A6", + "46A8", + "46AA", + "46B0", + "46B1", + "46B2", + "46B3", + "46C0", + "46C1", + "46C2", + "46C3", + "A720", + "A721", + "A7A0", + "A7A1", + "A7A8", + "A7A9", + }, + "12.4.0": {"46D0", "46D1", "46D2"}, + "12.10.0": {"4905", "4906", "4907", "4908"}, + "12.55.8": {"4F80", "4F81", "4F82", "4F83", "4F84", "5690", "5691", "5692", "56A0", "56A1", "56A2", "56C0"}, + "12.56.5": { + "4F87", + "4F88", + "5693", + "5694", + "5695", + "56A5", + "56A6", + "56B0", + "56B1", + "56BA", + "56BB", + "56BC", + "56BD", + "56C1", + }, + "12.57.0": {"4F85", "4F86", "5696", "5697", "56A3", "56A4", "56B2", "56B3"}, + "12.59.0": {"4F89", "56A9", "56AA"}, + "12.58.0": {"4F8C", "5698", "5699", "569A", "56A7", "56A8"}, + "12.70.4": {"7D40", "7D45", "7D60", "7D67"}, + "12.71.4": {"7D55", "7DD5"}, + }, } AOT_link_fmt = "https://www.fosshub.com/Demucs-GUI-old.html?dwl={file}" @@ -99,7 +169,29 @@ "12.57.0": "12.57.0_acm-g12_dg2-g12_dg2-g12-a0.7z", "12.58.0": "12.58.0_acm-g20_dg2-g20.7z", "12.59.0": "12.59.0_acm-g21_dg2-g21.7z", - } + }, + "2.1.30": { + "12.0.0": "12.0.0_2.1.30.7z", + "12.1.0": "12.1.0_2.1.30.7z", + "12.2.0": "12.2.0_2.1.30.7z", + "12.3.0": "12.3.0_2.1.30.7z", + "12.4.0": "12.4.0_2.1.30.7z", + "12.10.0": "12.10.0_2.1.30.7z", + "12.55.0": "12.55.0_2.1.30.7z", + "12.55.1": "12.55.1_2.1.30.7z", + "12.55.4": "12.55.4_2.1.30.7z", + "12.55.8": "12.55.8_2.1.30.7z", + "12.56.0": "12.56.0_2.1.30.7z", + "12.56.4": "12.56.4_2.1.30.7z", + "12.56.5": "12.56.5_2.1.30.7z", + "12.57.0": "12.57.0_2.1.30.7z", + "12.58.0": "12.58.0_2.1.30.7z", + "12.59.0": "12.59.0_2.1.30.7z", + "12.70.0": "12.70.0_2.1.30.7z", + "12.70.4": "12.70.4_2.1.30.7z", + "12.71.0": "12.71.0_2.1.30.7z", + "12.71.4": "12.71.4_2.1.30.7z", + }, } gpus = [] diff --git a/GUI/separator.py b/GUI/separator.py index 8d6ab68..d9f14b7 100644 --- a/GUI/separator.py +++ b/GUI/separator.py @@ -37,6 +37,7 @@ default_device = 0 used_cuda = False +used_xpu = False has_Intel = False Intel_JIT_only = False @@ -50,8 +51,9 @@ class ModelSourceNameUnsupportedError(Exception): @shared.thread_wrapper(daemon=True) def starter(update_status: tp.Callable[[str], None], finish: tp.Callable[[float], None]): - global torch, demucs, audio, has_Intel, Intel_JIT_only + global torch, demucs, audio, has_Intel, Intel_JIT_only, np import torch + import numpy as np for i in range(5): try: @@ -237,10 +239,13 @@ def autoListModels(): return models, infos, each_repos -def empty_cuda_cache(): +def empty_cache(): if used_cuda: for _ in range(10): torch.cuda.empty_cache() + if used_xpu: + for _ in range(10): + torch.xpu.empty_cache() class Separator: @@ -454,13 +459,16 @@ def separate( logging.info("Start separating audio: %s" % file.name) logging.info("Parameters: segment=%.2f overlap=%.2f shifts=%d" % (segment, overlap, shifts)) logging.info("Device: %s" % device) - global used_cuda + global used_cuda, used_xpu if device.startswith("cuda"): used_cuda = True + if device.startswith("xpu"): + used_xpu = True try: setStatus(shared.FileStatus.Reading, item) wav = audio.read_audio(file, self.separator.model.samplerate, self.updateStatus) assert wav is not None + assert (np.isnan(wav).sum() == 0) and (np.isinf(wav).sum() == 0), "Audio contains NaN or Inf" except Exception: finishCallback(shared.FileStatus.Failed, item) self.separating = False @@ -476,12 +484,15 @@ def separate( self.time_hists = [] self.last_update_eta = 0 + self.separator.model.to("cpu") # To avoid moving between different GPUs which may cause error + try: self.updateStatus("Separating audio: %s" % file.name) self.separator.update_parameter( device=device, segment=segment, shifts=shifts, overlap=overlap, callback=self.updateProgress ) wav_torch = torch.from_numpy(wav).clone().transpose(0, 1) + assert (not wav_torch.isnan().any()) and (not wav_torch.isinf().any()), "Audio contains NaN or Inf" src_channels = wav_torch.shape[0] logging.info("Running separation...") self.time_hists.append((time.time(), 0)) @@ -508,6 +519,8 @@ def separate( finishCallback(shared.FileStatus.Failed, item) self.separating = False return + finally: + self.separator.model.to("cpu") logging.info("Saving separated audio...") save_callback(file, wav_torch, out, self.save_callback, item, finishCallback) self.separating = False diff --git a/GUI/shared.py b/GUI/shared.py index 439af73..01168ec 100644 --- a/GUI/shared.py +++ b/GUI/shared.py @@ -85,6 +85,31 @@ historyLock = threading.Lock() +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS") and sys.platform == "win32": + # Popen should be wrapped to avoid WinError 50 + subprocess._Popen = subprocess.Popen + + def wrapped_Popen(*args, **kwargs): + if "stdout" in kwargs and kwargs["stdout"] is not None: + if "stderr" not in kwargs or kwargs["stderr"] is None: + kwargs["stderr"] = subprocess.PIPE + if "stdin" not in kwargs or kwargs["stdin"] is None: + kwargs["stdin"] = subprocess.PIPE + if "stderr" in kwargs and kwargs["stderr"] is not None: + if "stdout" not in kwargs or kwargs["stdout"] is None: + kwargs["stdout"] = subprocess.PIPE + if "stdin" not in kwargs or kwargs["stdin"] is None: + kwargs["stdin"] = subprocess.PIPE + if "stdin" in kwargs and kwargs["stdin"] is not None: + if "stdout" not in kwargs or kwargs["stdout"] is None: + kwargs["stdout"] = subprocess.PIPE + if "stderr" not in kwargs or kwargs["stderr"] is None: + kwargs["stderr"] = subprocess.PIPE + return subprocess._Popen(*args, **kwargs) + + subprocess.Popen = wrapped_Popen + + def HSize(size): s = size t = 0 diff --git a/MKL-AOT.md b/MKL-AOT.md index ef9e7c4..61d80d8 100644 --- a/MKL-AOT.md +++ b/MKL-AOT.md @@ -1,14 +1,17 @@ # Notes for users using MKL release (Intel GPU) -**THIS DOCUMENT IS OUT OF DATE FOR DEMUCS-GUI >= 1.2b1** - ## Why the separation process is so slow? The official package of IPEX (Intel Extension for Pytorch) is not built with AOT (Ahead-Of-Time), with only JIT (Just-In-Time) support (Demucs-GUI release is also packed with this package). This means that the first separation operation each time you start Demucs-GUI will take a long time (normally more than 5 minutes) to compile the model if you use Intel GPU. Please note that if you restart Demucs-GUI, the model will be recompiled again. JIT may also fails sometimes. This is when you need to restart Demucs-GUI. -This is because AOT binaries have to be compiled separately for each GPU architecture, and including all architectures (actually, 16) will make the package too large (20GB+). But I've built the AOT binaries separately for each architecture and uploaded them to FossHUB. The binaries are built for `Windows x86_64`, `Python 3.11`, `torch 2.1.0a0+git7bcf7da` (patched by Intel), `intel_extension_for_pytorch 2.1.10+git45400a8`. +This is because AOT binaries have to be compiled separately for each GPU architecture, and including all architectures (actually, 16) will make the package too large (20GB+). But I've built the AOT binaries separately for each architecture and uploaded them to FossHUB. + +I've built two different versions of `intel_extension_for_pytorch`. The binaries are built for `Windows x86_64`, `Python 3.11`. Demucs-GUI 1.1a2 to 1.2a1 are packed with `torch 2.1.0a0+git7bcf7da` (patched by Intel), `intel_extension_for_pytorch 2.1.10+git45400a8`, while from 1.2b1 it will be packed with `torch 2.1.0` and `intel_extension_for_pytorch 2.1.30+xpu`. You can also install `2.1.30+xpu` from [my own redistribution GitHub repo](https://github.com/CarlGao4/ipex-wheel/releases). Support list of each version is shown below. Please note that the support list is not associated with the version of Demucs-GUI, but the version of `intel_extension_for_pytorch`. It just means that the version of Demucs-GUI is packed with the corresponding version of `intel_extension_for_pytorch`. If you are running from source code, you can actually use any version of `intel_extension_for_pytorch` as long as it is compatible with your GPU. + +The table is generated by running `ocloc.exe` with argument device from 0x0000 to 0xFFFF. Theroetically, all these GPUs should be supported (even if they are not released yet) and even some unlisted GPUs can be supported. -Following GPUs are supported (for details, please see [find_device_win.py](GUI/find_device_win.py)): +### 2.1.10+xpu (Demucs-GUI 1.1a2 to 1.2a1) +Following GPUs are supported with `2.1.10+xpu` (for details, please see [find_device_win.py](GUI/find_device_win.py)): | PCI ID (Only the device part) | Architecture | Generation Code | Display Name | | ----------------------------- | ------------ | --------------- | ------------ | | `9A40` `9A49` `9A59` `9A60` `9A68` `9A70` `9A78` `FF20` | Tiger Lake (`tgl` `tgllp`) | [`12.0.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.0.0_tgl_tgllp.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` | @@ -23,7 +26,23 @@ Following GPUs are supported (for details, please see [find_device_win.py](GUI/f | `4F8C` `5698` `5699` `569A` `56A7` `56A8` | Alchemist (`acm-g20` `dg2-g20`) | [`12.58.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.58.0_acm-g20_dg2-g20.7z) | ~~`Intel® Arc™ A-series Graphics` (future)~~ | | `4F89` `56A9` `56AA` | Alchemist (`acm-g21` `dg2-g21`) | [`12.59.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.59.0_acm-g21_dg2-g21.7z) | ~~`Intel® Arc™ A-series Graphics` (future)~~ | -The table above is generated by running `ocloc.exe` with argument device from 0x0000 to 0xFFFF. Theroetically, all these GPUs should be supported (even if they are not released yet) and even some unlisted GPUs can be supported. +### 2.1.30+xpu (Demucs-GUI 1.2b1 and later) +Following GPUs are supported with `2.1.30+xpu` (for details, please see [find_device_win.py](GUI/find_device_win.py)): +| PCI ID (Only the device part) | Architecture | Generation Code | Display Name | +| ----------------------------- | ------------ | --------------- | ------------ | +| `9A40` `9A49` `9A59` `9A60` `9A68` `9A70` `9A78` `FF20` | Tiger Lake (`tgl` `tgllp`) | [`12.0.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.0.0_2.1.30.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` | +| `4C80` `4C8A` `4C8B` `4C8C` `4C90` `4C9A` | Rocket Lake (`rkl`) | [`12.1.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.1.0_2.1.30.7z) | `Intel® UHD Graphics` | +| `4680` `4682` `4688` `468A` `4690` `4692` `4693` `A780` `A781` `A782` `A783` `A788` `A789` `A78B` | Alder Lake-S, Raptor Lake-S (`adl-s`) | [`12.2.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.2.0_2.1.30.7z) | `Intel® UHD Graphics` | +| `4626` `4628` `462A` `46A0` `46A1` `46A2` `46A3` `46A6` `46A8` `46AA` `46B0` `46B1` `46B2` `46B3` `46C0` `46C1` `46C2` `46C3` `A720` `A721` `A7A0` `A7A1` `A7A8` `A7A9` | Alder Lake, Raptor Lake-P (`adl-p`) | [`12.3.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.3.0_2.1.30.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` | +| `46D0` `46D1` `46D2` | Alder Lake-N (`adl-n`) | [`12.4.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.4.0_2.1.30.7z) | `Intel® UHD Graphics` | +| `4905` `4906` `4907` `4908` | DG1 (`dg1`) | [`12.10.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.10.0_2.1.30.7z) | `Intel® Iris® Xe MAX Graphics` `Intel® SG-18M (SG1)` `Intel® Iris® Xe Graphics` | +| `4F80` `4F81` `4F82` `4F83` `4F84` `5690` `5691` `5692` `56A0` `56A1` `56A2` `56C0` | Alchemist, Intel® Data Center GPU Flex Series (`dg2-g10-a0` `dg2-g10-a1` `dg2-g10-b0` `acm-g10` `ats-m150` `dg2-g10` `dg2-g10-c0`) | [`12.55.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.0_2.1.30.7z) [`12.55.1`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.1_2.1.30.7z) [`12.55.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.4_2.1.30.7z) [`12.55.8`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.8_2.1.30.7z) | `Intel® Arc™ A770M Graphics` `Intel® Arc™ A730M Graphics` `Intel® Arc™ A550M Graphics` `Intel® Arc™ A770 Graphics` `Intel® Arc™ A750 Graphics` `Intel® Data Center GPU Flex 170` | +| `4F87` `4F88` `5693` `5694` `5695` `56A5` `56A6` `56B0` `56B1` `56BA` `56BB` `56BC` `56BD` `56C1` | Alchemist, Intel® Data Center GPU Flex Series (`dg2-g11-a0` `dg2-g11-b0` `acm-g11` `ats-m75` `dg2-g11` `dg2-g11-b1`) | [`12.56.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.0_2.1.30.7z) [`12.56.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.4_2.1.30.7z) [`12.56.5`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.5_2.1.30.7z) | `Intel® Arc™ A370M Graphics` `Intel® Arc™ A350M Graphics` `Intel® Arc™ A380 Graphics` `Intel® Arc™ A310 Graphics` `Intel® Data Center GPU Flex 140` `Intel® Arc™ A-series Graphics` | +| `4F85` `4F86` `5696` `5697` `56A3` `56A4` `56B2` `56B3` | Alchemist (`acm-g12` `dg2-g12` `dg2-g12-a0`) | [`12.57.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.57.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` | +| `4F8C` `5698` `5699` `569A` `56A7` `56A8` | Alchemist (`acm-g20` `dg2-g20`) | [`12.58.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.58.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` | +| `4F89` `56A9` `56AA` | Alchemist (`acm-g21` `dg2-g21`) | [`12.59.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.59.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` | +| `7D40` `7D45` `7D60` `7D67` | Meteor Lake-M, Meteor Lake-P, Arrow Lake-U (`xe-lpg-md-a0` `mtl-m` `mtl-s` `xe-lpg-md-b0`) | [`12.70.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.70.0_2.1.30.7z) [`12.70.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.70.4_2.1.30.7z) | `Intel® Iris® Xe Graphics` `Intel® UHD Graphics` | +| `7D55` `7DD5` | Meteor Lake-P (`xe-lpg-lg-a0` `mtl-p` `xe-lpg-lg-b0`) | [`12.71.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.71.0_2.1.30.7z) [`12.71.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.71.4_2.1.30.7z) | `Intel® Iris® Xe Graphics` | ## Install AOT enabled IPEX (Windows only, CPython 3.11)