ipex 2.1.30

CarlGao4 · May 9, 2024 · ffbff07 · ffbff07
1 parent a9a675d
commit ffbff07
Show file tree

Hide file tree

Showing 5 changed files with 175 additions and 14 deletions.
diff --git a/GUI/GuiMain.py b/GUI/GuiMain.py
@@ -1,4 +1,4 @@
-__version__ = "1.2a1"
+__version__ = "1.2b1"
 
 LICENSE = f"""Demucs-GUI {__version__}
 Copyright (C) 2022-2024  Carl Gao, Jize Guo, Rosario S.E.
@@ -124,6 +124,7 @@
 from PySide6_modified import (
     Action,
     DelegateCombiner,
+    DoNothingDelegate,
     ExpandingQPlainTextEdit,
     FileNameDelegate,
     PercentSpinBoxDelegate,
@@ -1072,6 +1073,8 @@ def save(self, file: pathlib.Path, origin, tensor, save_func, item, finishCallba
         finishCallback(shared.FileStatus.Writing, item)
         shared.AddHistory("save_location", value=self.loc_input.currentText())
         for stem, stem_data in main_window.mixer.mix(origin, tensor):
+            if separator.np.isnan(stem_data).any() or separator.np.isinf(stem_data).any():
+                logging.warning("NaN or inf found in stem %s" % stem)
             match self.encoder_group.checkedId():
                 case 0:
                     file_ext = self.file_format.currentText()
@@ -1437,6 +1440,12 @@ def resume(self):
                 shared.FileStatus.Cancelled,
                 shared.FileStatus.Failed,
             ]:
+                if self.table.item(i, 1).data(Qt.ItemDataRole.UserRole)[0] in [
+                    shared.FileStatus.Cancelled,
+                    shared.FileStatus.Failed,
+                ]:
+                    self.queue_length += 1
+                    main_window.updateQueueLength()
                 self.table.item(i, 1).setData(Qt.ItemDataRole.UserRole, [shared.FileStatus.Queued])
                 self.table.item(i, 1).setData(ProgressDelegate.TextRole, "Queued")
 
@@ -1541,6 +1550,9 @@ def __init__(self):
             PercentSpinBoxDelegate(minimum=-500, maximum=500, step=1),
             lambda x: x.column() > 1 and x.row() >= len(main_window.separator.sources) * 3,
         )
+        self.delegate.addDelegate(
+            DoNothingDelegate(), lambda x: x.column() >= 1 and x.row() < len(main_window.separator.sources) * 3
+        )
         self.outputs_table.setItemDelegate(self.delegate)
 
         default_preset = shared.GetHistory("default_preset", self.preset_stem_key, default=None, autoset=False)
@@ -1751,9 +1763,9 @@ def applyPreset(self):
         for stem, sources, enabled in preset[1]:
             # Calculate weights first
             # The order of sources is not guaranteed, so we need to use the index of the source
-            weights = [stem, sources["origin"]]
+            weights = [stem, f"{sources['origin']}%\u3000"]
             for source in main_window.separator.sources:
-                weights.append(sources[source])
+                weights.append(f"{sources[source]}%\u3000")
             self.outputs_table.addRow(weights, enabled)
 
     def duplicateSelected(self):
@@ -1949,7 +1961,7 @@ def startSeparation(self):
             self.start_button.setEnabled(True)
             main_window.save_options.encoder_ffmpeg_box.setEnabled(True)
             main_window.setStatusText.emit("No more file to separate")
-            separator.empty_cuda_cache()
+            separator.empty_cache()
             return
         file = main_window.file_queue.table.item(index, 0).data(Qt.ItemDataRole.UserRole)
         item = main_window.file_queue.table.item(index, 1)

diff --git a/GUI/find_device_win.py b/GUI/find_device_win.py
@@ -77,7 +77,77 @@
         "12.57.0": {"4F85", "4F86", "5696", "5697", "56A3", "56A4", "56B2", "56B3"},
         "12.58.0": {"4F8C", "5698", "5699", "569A", "56A7", "56A8"},
         "12.59.0": {"4F89", "56A9", "56AA"},
-    }
+    },
+    "2.1.30": {
+        "12.0.0": {"9A40", "9A49", "9A59", "9A60", "9A68", "9A70", "9A78", "FF20"},
+        "12.1.0": {"4C80", "4C8A", "4C8B", "4C8C", "4C90", "4C9A"},
+        "12.2.0": {
+            "4680",
+            "4682",
+            "4688",
+            "468A",
+            "4690",
+            "4692",
+            "4693",
+            "A780",
+            "A781",
+            "A782",
+            "A783",
+            "A788",
+            "A789",
+            "A78B",
+        },
+        "12.3.0": {
+            "4626",
+            "4628",
+            "462A",
+            "46A0",
+            "46A1",
+            "46A2",
+            "46A3",
+            "46A6",
+            "46A8",
+            "46AA",
+            "46B0",
+            "46B1",
+            "46B2",
+            "46B3",
+            "46C0",
+            "46C1",
+            "46C2",
+            "46C3",
+            "A720",
+            "A721",
+            "A7A0",
+            "A7A1",
+            "A7A8",
+            "A7A9",
+        },
+        "12.4.0": {"46D0", "46D1", "46D2"},
+        "12.10.0": {"4905", "4906", "4907", "4908"},
+        "12.55.8": {"4F80", "4F81", "4F82", "4F83", "4F84", "5690", "5691", "5692", "56A0", "56A1", "56A2", "56C0"},
+        "12.56.5": {
+            "4F87",
+            "4F88",
+            "5693",
+            "5694",
+            "5695",
+            "56A5",
+            "56A6",
+            "56B0",
+            "56B1",
+            "56BA",
+            "56BB",
+            "56BC",
+            "56BD",
+            "56C1",
+        },
+        "12.57.0": {"4F85", "4F86", "5696", "5697", "56A3", "56A4", "56B2", "56B3"},
+        "12.59.0": {"4F89", "56A9", "56AA"},
+        "12.58.0": {"4F8C", "5698", "5699", "569A", "56A7", "56A8"},
+        "12.70.4": {"7D40", "7D45", "7D60", "7D67"},
+        "12.71.4": {"7D55", "7DD5"},
+    },
 }
 
 AOT_link_fmt = "https://www.fosshub.com/Demucs-GUI-old.html?dwl={file}"
@@ -99,7 +169,29 @@
         "12.57.0": "12.57.0_acm-g12_dg2-g12_dg2-g12-a0.7z",
         "12.58.0": "12.58.0_acm-g20_dg2-g20.7z",
         "12.59.0": "12.59.0_acm-g21_dg2-g21.7z",
-    }
+    },
+    "2.1.30": {
+        "12.0.0": "12.0.0_2.1.30.7z",
+        "12.1.0": "12.1.0_2.1.30.7z",
+        "12.2.0": "12.2.0_2.1.30.7z",
+        "12.3.0": "12.3.0_2.1.30.7z",
+        "12.4.0": "12.4.0_2.1.30.7z",
+        "12.10.0": "12.10.0_2.1.30.7z",
+        "12.55.0": "12.55.0_2.1.30.7z",
+        "12.55.1": "12.55.1_2.1.30.7z",
+        "12.55.4": "12.55.4_2.1.30.7z",
+        "12.55.8": "12.55.8_2.1.30.7z",
+        "12.56.0": "12.56.0_2.1.30.7z",
+        "12.56.4": "12.56.4_2.1.30.7z",
+        "12.56.5": "12.56.5_2.1.30.7z",
+        "12.57.0": "12.57.0_2.1.30.7z",
+        "12.58.0": "12.58.0_2.1.30.7z",
+        "12.59.0": "12.59.0_2.1.30.7z",
+        "12.70.0": "12.70.0_2.1.30.7z",
+        "12.70.4": "12.70.4_2.1.30.7z",
+        "12.71.0": "12.71.0_2.1.30.7z",
+        "12.71.4": "12.71.4_2.1.30.7z",
+    },
 }
 
 gpus = []

diff --git a/GUI/separator.py b/GUI/separator.py
@@ -37,6 +37,7 @@
 
 default_device = 0
 used_cuda = False
+used_xpu = False
 has_Intel = False
 Intel_JIT_only = False
 
@@ -50,8 +51,9 @@ class ModelSourceNameUnsupportedError(Exception):
 
 @shared.thread_wrapper(daemon=True)
 def starter(update_status: tp.Callable[[str], None], finish: tp.Callable[[float], None]):
-    global torch, demucs, audio, has_Intel, Intel_JIT_only
+    global torch, demucs, audio, has_Intel, Intel_JIT_only, np
     import torch
+    import numpy as np
 
     for i in range(5):
         try:
@@ -237,10 +239,13 @@ def autoListModels():
     return models, infos, each_repos
 
 
-def empty_cuda_cache():
+def empty_cache():
     if used_cuda:
         for _ in range(10):
             torch.cuda.empty_cache()
+    if used_xpu:
+        for _ in range(10):
+            torch.xpu.empty_cache()
 
 
 class Separator:
@@ -454,13 +459,16 @@ def separate(
         logging.info("Start separating audio: %s" % file.name)
         logging.info("Parameters: segment=%.2f overlap=%.2f shifts=%d" % (segment, overlap, shifts))
         logging.info("Device: %s" % device)
-        global used_cuda
+        global used_cuda, used_xpu
         if device.startswith("cuda"):
             used_cuda = True
+        if device.startswith("xpu"):
+            used_xpu = True
         try:
             setStatus(shared.FileStatus.Reading, item)
             wav = audio.read_audio(file, self.separator.model.samplerate, self.updateStatus)
             assert wav is not None
+            assert (np.isnan(wav).sum() == 0) and (np.isinf(wav).sum() == 0), "Audio contains NaN or Inf"
         except Exception:
             finishCallback(shared.FileStatus.Failed, item)
             self.separating = False
@@ -476,12 +484,15 @@ def separate(
         self.time_hists = []
         self.last_update_eta = 0
 
+        self.separator.model.to("cpu")  # To avoid moving between different GPUs which may cause error
+
         try:
             self.updateStatus("Separating audio: %s" % file.name)
             self.separator.update_parameter(
                 device=device, segment=segment, shifts=shifts, overlap=overlap, callback=self.updateProgress
             )
             wav_torch = torch.from_numpy(wav).clone().transpose(0, 1)
+            assert (not wav_torch.isnan().any()) and (not wav_torch.isinf().any()), "Audio contains NaN or Inf"
             src_channels = wav_torch.shape[0]
             logging.info("Running separation...")
             self.time_hists.append((time.time(), 0))
@@ -508,6 +519,8 @@ def separate(
             finishCallback(shared.FileStatus.Failed, item)
             self.separating = False
             return
+        finally:
+            self.separator.model.to("cpu")
         logging.info("Saving separated audio...")
         save_callback(file, wav_torch, out, self.save_callback, item, finishCallback)
         self.separating = False

diff --git a/GUI/shared.py b/GUI/shared.py
@@ -85,6 +85,31 @@
 historyLock = threading.Lock()
 
 
+if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS") and sys.platform == "win32":
+    # Popen should be wrapped to avoid WinError 50
+    subprocess._Popen = subprocess.Popen
+
+    def wrapped_Popen(*args, **kwargs):
+        if "stdout" in kwargs and kwargs["stdout"] is not None:
+            if "stderr" not in kwargs or kwargs["stderr"] is None:
+                kwargs["stderr"] = subprocess.PIPE
+            if "stdin" not in kwargs or kwargs["stdin"] is None:
+                kwargs["stdin"] = subprocess.PIPE
+        if "stderr" in kwargs and kwargs["stderr"] is not None:
+            if "stdout" not in kwargs or kwargs["stdout"] is None:
+                kwargs["stdout"] = subprocess.PIPE
+            if "stdin" not in kwargs or kwargs["stdin"] is None:
+                kwargs["stdin"] = subprocess.PIPE
+        if "stdin" in kwargs and kwargs["stdin"] is not None:
+            if "stdout" not in kwargs or kwargs["stdout"] is None:
+                kwargs["stdout"] = subprocess.PIPE
+            if "stderr" not in kwargs or kwargs["stderr"] is None:
+                kwargs["stderr"] = subprocess.PIPE
+        return subprocess._Popen(*args, **kwargs)
+
+    subprocess.Popen = wrapped_Popen
+
+
 def HSize(size):
     s = size
     t = 0

diff --git a/MKL-AOT.md b/MKL-AOT.md
@@ -1,14 +1,17 @@
 # Notes for users using MKL release (Intel GPU)
 
-**THIS DOCUMENT IS OUT OF DATE FOR DEMUCS-GUI >= 1.2b1**
-
 ## Why the separation process is so slow?
 
 The official package of IPEX (Intel Extension for Pytorch) is not built with AOT (Ahead-Of-Time), with only JIT (Just-In-Time) support (Demucs-GUI release is also packed with this package). This means that the first separation operation each time you start Demucs-GUI will take a long time (normally more than 5 minutes) to compile the model if you use Intel GPU. Please note that if you restart Demucs-GUI, the model will be recompiled again. JIT may also fails sometimes. This is when you need to restart Demucs-GUI.
 
-This is because AOT binaries have to be compiled separately for each GPU architecture, and including all architectures (actually, 16) will make the package too large (20GB+). But I've built the AOT binaries separately for each architecture and uploaded them to FossHUB. The binaries are built for `Windows x86_64`, `Python 3.11`, `torch 2.1.0a0+git7bcf7da` (patched by Intel), `intel_extension_for_pytorch 2.1.10+git45400a8`.
+This is because AOT binaries have to be compiled separately for each GPU architecture, and including all architectures (actually, 16) will make the package too large (20GB+). But I've built the AOT binaries separately for each architecture and uploaded them to FossHUB.
+
+I've built two different versions of  `intel_extension_for_pytorch`. The binaries are built for `Windows x86_64`, `Python 3.11`. Demucs-GUI 1.1a2 to 1.2a1 are packed with `torch 2.1.0a0+git7bcf7da` (patched by Intel), `intel_extension_for_pytorch 2.1.10+git45400a8`, while from 1.2b1 it will be packed with `torch 2.1.0` and `intel_extension_for_pytorch 2.1.30+xpu`. You can also install `2.1.30+xpu` from [my own redistribution GitHub repo](https://github.com/CarlGao4/ipex-wheel/releases). Support list of each version is shown below. Please note that the support list is not associated with the version of Demucs-GUI, but the version of `intel_extension_for_pytorch`. It just means that the version of Demucs-GUI is packed with the corresponding version of `intel_extension_for_pytorch`. If you are running from source code, you can actually use any version of `intel_extension_for_pytorch` as long as it is compatible with your GPU.
+
+The table is generated by running `ocloc.exe` with argument device from 0x0000 to 0xFFFF. Theroetically, all these GPUs should be supported (even if they are not released yet) and even some unlisted GPUs can be supported.
 
-Following GPUs are supported (for details, please see [find_device_win.py](GUI/find_device_win.py)):
+### 2.1.10+xpu (Demucs-GUI 1.1a2 to 1.2a1)
+Following GPUs are supported with `2.1.10+xpu` (for details, please see [find_device_win.py](GUI/find_device_win.py)):
 | PCI ID (Only the device part) | Architecture | Generation Code | Display Name |
 | ----------------------------- | ------------ | --------------- | ------------ |
 | `9A40` `9A49` `9A59` `9A60` `9A68` `9A70` `9A78` `FF20` | Tiger Lake (`tgl` `tgllp`) | [`12.0.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.0.0_tgl_tgllp.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` |
@@ -23,7 +26,23 @@ Following GPUs are supported (for details, please see [find_device_win.py](GUI/f
 | `4F8C` `5698` `5699` `569A` `56A7` `56A8` | Alchemist (`acm-g20` `dg2-g20`) | [`12.58.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.58.0_acm-g20_dg2-g20.7z) | ~~`Intel® Arc™ A-series Graphics` (future)~~ |
 | `4F89` `56A9` `56AA` | Alchemist (`acm-g21` `dg2-g21`) | [`12.59.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.59.0_acm-g21_dg2-g21.7z) | ~~`Intel® Arc™ A-series Graphics` (future)~~ |
 
-The table above is generated by running `ocloc.exe` with argument device from 0x0000 to 0xFFFF. Theroetically, all these GPUs should be supported (even if they are not released yet) and even some unlisted GPUs can be supported.
+### 2.1.30+xpu (Demucs-GUI 1.2b1 and later)
+Following GPUs are supported with `2.1.30+xpu` (for details, please see [find_device_win.py](GUI/find_device_win.py)):
+| PCI ID (Only the device part) | Architecture | Generation Code | Display Name |
+| ----------------------------- | ------------ | --------------- | ------------ |
+| `9A40` `9A49` `9A59` `9A60` `9A68` `9A70` `9A78` `FF20` | Tiger Lake (`tgl` `tgllp`) | [`12.0.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.0.0_2.1.30.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` |
+| `4C80` `4C8A` `4C8B` `4C8C` `4C90` `4C9A` | Rocket Lake (`rkl`) | [`12.1.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.1.0_2.1.30.7z) | `Intel® UHD Graphics` |
+| `4680` `4682` `4688` `468A` `4690` `4692` `4693` `A780` `A781` `A782` `A783` `A788` `A789` `A78B` | Alder Lake-S, Raptor Lake-S (`adl-s`) | [`12.2.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.2.0_2.1.30.7z) | `Intel® UHD Graphics` |
+| `4626` `4628` `462A` `46A0` `46A1` `46A2` `46A3` `46A6` `46A8` `46AA` `46B0` `46B1` `46B2` `46B3` `46C0` `46C1` `46C2` `46C3` `A720` `A721` `A7A0` `A7A1` `A7A8` `A7A9` | Alder Lake, Raptor Lake-P (`adl-p`) | [`12.3.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.3.0_2.1.30.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` |
+| `46D0` `46D1` `46D2` | Alder Lake-N (`adl-n`) | [`12.4.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.4.0_2.1.30.7z) | `Intel® UHD Graphics` |
+| `4905` `4906` `4907` `4908` | DG1 (`dg1`) | [`12.10.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.10.0_2.1.30.7z) | `Intel® Iris® Xe MAX Graphics` `Intel® SG-18M (SG1)` `Intel® Iris® Xe Graphics` |
+| `4F80` `4F81` `4F82` `4F83` `4F84` `5690` `5691` `5692` `56A0` `56A1` `56A2` `56C0` | Alchemist, Intel® Data Center GPU Flex Series (`dg2-g10-a0` `dg2-g10-a1` `dg2-g10-b0` `acm-g10` `ats-m150` `dg2-g10` `dg2-g10-c0`) | [`12.55.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.0_2.1.30.7z) [`12.55.1`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.1_2.1.30.7z) [`12.55.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.4_2.1.30.7z) [`12.55.8`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.8_2.1.30.7z) | `Intel® Arc™ A770M Graphics` `Intel® Arc™ A730M Graphics` `Intel® Arc™ A550M Graphics` `Intel® Arc™ A770 Graphics` `Intel® Arc™ A750 Graphics` `Intel® Data Center GPU Flex 170` |
+| `4F87` `4F88` `5693` `5694` `5695` `56A5` `56A6` `56B0` `56B1` `56BA` `56BB` `56BC` `56BD` `56C1` | Alchemist, Intel® Data Center GPU Flex Series (`dg2-g11-a0` `dg2-g11-b0` `acm-g11` `ats-m75` `dg2-g11` `dg2-g11-b1`) | [`12.56.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.0_2.1.30.7z) [`12.56.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.4_2.1.30.7z) [`12.56.5`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.5_2.1.30.7z) | `Intel® Arc™ A370M Graphics` `Intel® Arc™ A350M Graphics` `Intel® Arc™ A380 Graphics` `Intel® Arc™ A310 Graphics` `Intel® Data Center GPU Flex 140` `Intel® Arc™ A-series Graphics` |
+| `4F85` `4F86` `5696` `5697` `56A3` `56A4` `56B2` `56B3` | Alchemist (`acm-g12` `dg2-g12` `dg2-g12-a0`) | [`12.57.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.57.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` |
+| `4F8C` `5698` `5699` `569A` `56A7` `56A8` | Alchemist (`acm-g20` `dg2-g20`) | [`12.58.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.58.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` |
+| `4F89` `56A9` `56AA` | Alchemist (`acm-g21` `dg2-g21`) | [`12.59.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.59.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` |
+| `7D40` `7D45` `7D60` `7D67` | Meteor Lake-M, Meteor Lake-P, Arrow Lake-U (`xe-lpg-md-a0` `mtl-m` `mtl-s` `xe-lpg-md-b0`) | [`12.70.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.70.0_2.1.30.7z) [`12.70.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.70.4_2.1.30.7z) | `Intel® Iris® Xe Graphics` `Intel® UHD Graphics` |
+| `7D55` `7DD5` | Meteor Lake-P (`xe-lpg-lg-a0` `mtl-p` `xe-lpg-lg-b0`) | [`12.71.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.71.0_2.1.30.7z) [`12.71.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.71.4_2.1.30.7z) | `Intel® Iris® Xe Graphics` |
 
 ## Install AOT enabled IPEX (Windows only, CPython 3.11)