Skip to content

Commit

Permalink
ipex 2.1.30
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlGao4 committed May 9, 2024
1 parent a9a675d commit ffbff07
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 14 deletions.
20 changes: 16 additions & 4 deletions GUI/GuiMain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.2a1"
__version__ = "1.2b1"

LICENSE = f"""Demucs-GUI {__version__}
Copyright (C) 2022-2024 Carl Gao, Jize Guo, Rosario S.E.
Expand Down Expand Up @@ -124,6 +124,7 @@
from PySide6_modified import (
Action,
DelegateCombiner,
DoNothingDelegate,
ExpandingQPlainTextEdit,
FileNameDelegate,
PercentSpinBoxDelegate,
Expand Down Expand Up @@ -1072,6 +1073,8 @@ def save(self, file: pathlib.Path, origin, tensor, save_func, item, finishCallba
finishCallback(shared.FileStatus.Writing, item)
shared.AddHistory("save_location", value=self.loc_input.currentText())
for stem, stem_data in main_window.mixer.mix(origin, tensor):
if separator.np.isnan(stem_data).any() or separator.np.isinf(stem_data).any():
logging.warning("NaN or inf found in stem %s" % stem)
match self.encoder_group.checkedId():
case 0:
file_ext = self.file_format.currentText()
Expand Down Expand Up @@ -1437,6 +1440,12 @@ def resume(self):
shared.FileStatus.Cancelled,
shared.FileStatus.Failed,
]:
if self.table.item(i, 1).data(Qt.ItemDataRole.UserRole)[0] in [
shared.FileStatus.Cancelled,
shared.FileStatus.Failed,
]:
self.queue_length += 1
main_window.updateQueueLength()
self.table.item(i, 1).setData(Qt.ItemDataRole.UserRole, [shared.FileStatus.Queued])
self.table.item(i, 1).setData(ProgressDelegate.TextRole, "Queued")

Expand Down Expand Up @@ -1541,6 +1550,9 @@ def __init__(self):
PercentSpinBoxDelegate(minimum=-500, maximum=500, step=1),
lambda x: x.column() > 1 and x.row() >= len(main_window.separator.sources) * 3,
)
self.delegate.addDelegate(
DoNothingDelegate(), lambda x: x.column() >= 1 and x.row() < len(main_window.separator.sources) * 3
)
self.outputs_table.setItemDelegate(self.delegate)

default_preset = shared.GetHistory("default_preset", self.preset_stem_key, default=None, autoset=False)
Expand Down Expand Up @@ -1751,9 +1763,9 @@ def applyPreset(self):
for stem, sources, enabled in preset[1]:
# Calculate weights first
# The order of sources is not guaranteed, so we need to use the index of the source
weights = [stem, sources["origin"]]
weights = [stem, f"{sources['origin']}%\u3000"]
for source in main_window.separator.sources:
weights.append(sources[source])
weights.append(f"{sources[source]}%\u3000")
self.outputs_table.addRow(weights, enabled)

def duplicateSelected(self):
Expand Down Expand Up @@ -1949,7 +1961,7 @@ def startSeparation(self):
self.start_button.setEnabled(True)
main_window.save_options.encoder_ffmpeg_box.setEnabled(True)
main_window.setStatusText.emit("No more file to separate")
separator.empty_cuda_cache()
separator.empty_cache()
return
file = main_window.file_queue.table.item(index, 0).data(Qt.ItemDataRole.UserRole)
item = main_window.file_queue.table.item(index, 1)
Expand Down
96 changes: 94 additions & 2 deletions GUI/find_device_win.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,77 @@
"12.57.0": {"4F85", "4F86", "5696", "5697", "56A3", "56A4", "56B2", "56B3"},
"12.58.0": {"4F8C", "5698", "5699", "569A", "56A7", "56A8"},
"12.59.0": {"4F89", "56A9", "56AA"},
}
},
"2.1.30": {
"12.0.0": {"9A40", "9A49", "9A59", "9A60", "9A68", "9A70", "9A78", "FF20"},
"12.1.0": {"4C80", "4C8A", "4C8B", "4C8C", "4C90", "4C9A"},
"12.2.0": {
"4680",
"4682",
"4688",
"468A",
"4690",
"4692",
"4693",
"A780",
"A781",
"A782",
"A783",
"A788",
"A789",
"A78B",
},
"12.3.0": {
"4626",
"4628",
"462A",
"46A0",
"46A1",
"46A2",
"46A3",
"46A6",
"46A8",
"46AA",
"46B0",
"46B1",
"46B2",
"46B3",
"46C0",
"46C1",
"46C2",
"46C3",
"A720",
"A721",
"A7A0",
"A7A1",
"A7A8",
"A7A9",
},
"12.4.0": {"46D0", "46D1", "46D2"},
"12.10.0": {"4905", "4906", "4907", "4908"},
"12.55.8": {"4F80", "4F81", "4F82", "4F83", "4F84", "5690", "5691", "5692", "56A0", "56A1", "56A2", "56C0"},
"12.56.5": {
"4F87",
"4F88",
"5693",
"5694",
"5695",
"56A5",
"56A6",
"56B0",
"56B1",
"56BA",
"56BB",
"56BC",
"56BD",
"56C1",
},
"12.57.0": {"4F85", "4F86", "5696", "5697", "56A3", "56A4", "56B2", "56B3"},
"12.59.0": {"4F89", "56A9", "56AA"},
"12.58.0": {"4F8C", "5698", "5699", "569A", "56A7", "56A8"},
"12.70.4": {"7D40", "7D45", "7D60", "7D67"},
"12.71.4": {"7D55", "7DD5"},
},
}

AOT_link_fmt = "https://www.fosshub.com/Demucs-GUI-old.html?dwl={file}"
Expand All @@ -99,7 +169,29 @@
"12.57.0": "12.57.0_acm-g12_dg2-g12_dg2-g12-a0.7z",
"12.58.0": "12.58.0_acm-g20_dg2-g20.7z",
"12.59.0": "12.59.0_acm-g21_dg2-g21.7z",
}
},
"2.1.30": {
"12.0.0": "12.0.0_2.1.30.7z",
"12.1.0": "12.1.0_2.1.30.7z",
"12.2.0": "12.2.0_2.1.30.7z",
"12.3.0": "12.3.0_2.1.30.7z",
"12.4.0": "12.4.0_2.1.30.7z",
"12.10.0": "12.10.0_2.1.30.7z",
"12.55.0": "12.55.0_2.1.30.7z",
"12.55.1": "12.55.1_2.1.30.7z",
"12.55.4": "12.55.4_2.1.30.7z",
"12.55.8": "12.55.8_2.1.30.7z",
"12.56.0": "12.56.0_2.1.30.7z",
"12.56.4": "12.56.4_2.1.30.7z",
"12.56.5": "12.56.5_2.1.30.7z",
"12.57.0": "12.57.0_2.1.30.7z",
"12.58.0": "12.58.0_2.1.30.7z",
"12.59.0": "12.59.0_2.1.30.7z",
"12.70.0": "12.70.0_2.1.30.7z",
"12.70.4": "12.70.4_2.1.30.7z",
"12.71.0": "12.71.0_2.1.30.7z",
"12.71.4": "12.71.4_2.1.30.7z",
},
}

gpus = []
Expand Down
19 changes: 16 additions & 3 deletions GUI/separator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

default_device = 0
used_cuda = False
used_xpu = False
has_Intel = False
Intel_JIT_only = False

Expand All @@ -50,8 +51,9 @@ class ModelSourceNameUnsupportedError(Exception):

@shared.thread_wrapper(daemon=True)
def starter(update_status: tp.Callable[[str], None], finish: tp.Callable[[float], None]):
global torch, demucs, audio, has_Intel, Intel_JIT_only
global torch, demucs, audio, has_Intel, Intel_JIT_only, np
import torch
import numpy as np

for i in range(5):
try:
Expand Down Expand Up @@ -237,10 +239,13 @@ def autoListModels():
return models, infos, each_repos


def empty_cuda_cache():
def empty_cache():
if used_cuda:
for _ in range(10):
torch.cuda.empty_cache()
if used_xpu:
for _ in range(10):
torch.xpu.empty_cache()


class Separator:
Expand Down Expand Up @@ -454,13 +459,16 @@ def separate(
logging.info("Start separating audio: %s" % file.name)
logging.info("Parameters: segment=%.2f overlap=%.2f shifts=%d" % (segment, overlap, shifts))
logging.info("Device: %s" % device)
global used_cuda
global used_cuda, used_xpu
if device.startswith("cuda"):
used_cuda = True
if device.startswith("xpu"):
used_xpu = True
try:
setStatus(shared.FileStatus.Reading, item)
wav = audio.read_audio(file, self.separator.model.samplerate, self.updateStatus)
assert wav is not None
assert (np.isnan(wav).sum() == 0) and (np.isinf(wav).sum() == 0), "Audio contains NaN or Inf"
except Exception:
finishCallback(shared.FileStatus.Failed, item)
self.separating = False
Expand All @@ -476,12 +484,15 @@ def separate(
self.time_hists = []
self.last_update_eta = 0

self.separator.model.to("cpu") # To avoid moving between different GPUs which may cause error

try:
self.updateStatus("Separating audio: %s" % file.name)
self.separator.update_parameter(
device=device, segment=segment, shifts=shifts, overlap=overlap, callback=self.updateProgress
)
wav_torch = torch.from_numpy(wav).clone().transpose(0, 1)
assert (not wav_torch.isnan().any()) and (not wav_torch.isinf().any()), "Audio contains NaN or Inf"
src_channels = wav_torch.shape[0]
logging.info("Running separation...")
self.time_hists.append((time.time(), 0))
Expand All @@ -508,6 +519,8 @@ def separate(
finishCallback(shared.FileStatus.Failed, item)
self.separating = False
return
finally:
self.separator.model.to("cpu")
logging.info("Saving separated audio...")
save_callback(file, wav_torch, out, self.save_callback, item, finishCallback)
self.separating = False
Expand Down
25 changes: 25 additions & 0 deletions GUI/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,31 @@
historyLock = threading.Lock()


if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS") and sys.platform == "win32":
# Popen should be wrapped to avoid WinError 50
subprocess._Popen = subprocess.Popen

def wrapped_Popen(*args, **kwargs):
if "stdout" in kwargs and kwargs["stdout"] is not None:
if "stderr" not in kwargs or kwargs["stderr"] is None:
kwargs["stderr"] = subprocess.PIPE
if "stdin" not in kwargs or kwargs["stdin"] is None:
kwargs["stdin"] = subprocess.PIPE
if "stderr" in kwargs and kwargs["stderr"] is not None:
if "stdout" not in kwargs or kwargs["stdout"] is None:
kwargs["stdout"] = subprocess.PIPE
if "stdin" not in kwargs or kwargs["stdin"] is None:
kwargs["stdin"] = subprocess.PIPE
if "stdin" in kwargs and kwargs["stdin"] is not None:
if "stdout" not in kwargs or kwargs["stdout"] is None:
kwargs["stdout"] = subprocess.PIPE
if "stderr" not in kwargs or kwargs["stderr"] is None:
kwargs["stderr"] = subprocess.PIPE
return subprocess._Popen(*args, **kwargs)

subprocess.Popen = wrapped_Popen


def HSize(size):
s = size
t = 0
Expand Down
29 changes: 24 additions & 5 deletions MKL-AOT.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# Notes for users using MKL release (Intel GPU)

**THIS DOCUMENT IS OUT OF DATE FOR DEMUCS-GUI >= 1.2b1**

## Why the separation process is so slow?

The official package of IPEX (Intel Extension for Pytorch) is not built with AOT (Ahead-Of-Time), with only JIT (Just-In-Time) support (Demucs-GUI release is also packed with this package). This means that the first separation operation each time you start Demucs-GUI will take a long time (normally more than 5 minutes) to compile the model if you use Intel GPU. Please note that if you restart Demucs-GUI, the model will be recompiled again. JIT may also fails sometimes. This is when you need to restart Demucs-GUI.

This is because AOT binaries have to be compiled separately for each GPU architecture, and including all architectures (actually, 16) will make the package too large (20GB+). But I've built the AOT binaries separately for each architecture and uploaded them to FossHUB. The binaries are built for `Windows x86_64`, `Python 3.11`, `torch 2.1.0a0+git7bcf7da` (patched by Intel), `intel_extension_for_pytorch 2.1.10+git45400a8`.
This is because AOT binaries have to be compiled separately for each GPU architecture, and including all architectures (actually, 16) will make the package too large (20GB+). But I've built the AOT binaries separately for each architecture and uploaded them to FossHUB.

I've built two different versions of `intel_extension_for_pytorch`. The binaries are built for `Windows x86_64`, `Python 3.11`. Demucs-GUI 1.1a2 to 1.2a1 are packed with `torch 2.1.0a0+git7bcf7da` (patched by Intel), `intel_extension_for_pytorch 2.1.10+git45400a8`, while from 1.2b1 it will be packed with `torch 2.1.0` and `intel_extension_for_pytorch 2.1.30+xpu`. You can also install `2.1.30+xpu` from [my own redistribution GitHub repo](https://github.com/CarlGao4/ipex-wheel/releases). Support list of each version is shown below. Please note that the support list is not associated with the version of Demucs-GUI, but the version of `intel_extension_for_pytorch`. It just means that the version of Demucs-GUI is packed with the corresponding version of `intel_extension_for_pytorch`. If you are running from source code, you can actually use any version of `intel_extension_for_pytorch` as long as it is compatible with your GPU.

The table is generated by running `ocloc.exe` with argument device from 0x0000 to 0xFFFF. Theroetically, all these GPUs should be supported (even if they are not released yet) and even some unlisted GPUs can be supported.

Following GPUs are supported (for details, please see [find_device_win.py](GUI/find_device_win.py)):
### 2.1.10+xpu (Demucs-GUI 1.1a2 to 1.2a1)
Following GPUs are supported with `2.1.10+xpu` (for details, please see [find_device_win.py](GUI/find_device_win.py)):
| PCI ID (Only the device part) | Architecture | Generation Code | Display Name |
| ----------------------------- | ------------ | --------------- | ------------ |
| `9A40` `9A49` `9A59` `9A60` `9A68` `9A70` `9A78` `FF20` | Tiger Lake (`tgl` `tgllp`) | [`12.0.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.0.0_tgl_tgllp.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` |
Expand All @@ -23,7 +26,23 @@ Following GPUs are supported (for details, please see [find_device_win.py](GUI/f
| `4F8C` `5698` `5699` `569A` `56A7` `56A8` | Alchemist (`acm-g20` `dg2-g20`) | [`12.58.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.58.0_acm-g20_dg2-g20.7z) | ~~`Intel® Arc™ A-series Graphics` (future)~~ |
| `4F89` `56A9` `56AA` | Alchemist (`acm-g21` `dg2-g21`) | [`12.59.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.59.0_acm-g21_dg2-g21.7z) | ~~`Intel® Arc™ A-series Graphics` (future)~~ |

The table above is generated by running `ocloc.exe` with argument device from 0x0000 to 0xFFFF. Theroetically, all these GPUs should be supported (even if they are not released yet) and even some unlisted GPUs can be supported.
### 2.1.30+xpu (Demucs-GUI 1.2b1 and later)
Following GPUs are supported with `2.1.30+xpu` (for details, please see [find_device_win.py](GUI/find_device_win.py)):
| PCI ID (Only the device part) | Architecture | Generation Code | Display Name |
| ----------------------------- | ------------ | --------------- | ------------ |
| `9A40` `9A49` `9A59` `9A60` `9A68` `9A70` `9A78` `FF20` | Tiger Lake (`tgl` `tgllp`) | [`12.0.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.0.0_2.1.30.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` |
| `4C80` `4C8A` `4C8B` `4C8C` `4C90` `4C9A` | Rocket Lake (`rkl`) | [`12.1.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.1.0_2.1.30.7z) | `Intel® UHD Graphics` |
| `4680` `4682` `4688` `468A` `4690` `4692` `4693` `A780` `A781` `A782` `A783` `A788` `A789` `A78B` | Alder Lake-S, Raptor Lake-S (`adl-s`) | [`12.2.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.2.0_2.1.30.7z) | `Intel® UHD Graphics` |
| `4626` `4628` `462A` `46A0` `46A1` `46A2` `46A3` `46A6` `46A8` `46AA` `46B0` `46B1` `46B2` `46B3` `46C0` `46C1` `46C2` `46C3` `A720` `A721` `A7A0` `A7A1` `A7A8` `A7A9` | Alder Lake, Raptor Lake-P (`adl-p`) | [`12.3.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.3.0_2.1.30.7z) | `Intel® UHD Graphics` `Intel® Iris® Xe Graphics` |
| `46D0` `46D1` `46D2` | Alder Lake-N (`adl-n`) | [`12.4.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.4.0_2.1.30.7z) | `Intel® UHD Graphics` |
| `4905` `4906` `4907` `4908` | DG1 (`dg1`) | [`12.10.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.10.0_2.1.30.7z) | `Intel® Iris® Xe MAX Graphics` `Intel® SG-18M (SG1)` `Intel® Iris® Xe Graphics` |
| `4F80` `4F81` `4F82` `4F83` `4F84` `5690` `5691` `5692` `56A0` `56A1` `56A2` `56C0` | Alchemist, Intel® Data Center GPU Flex Series (`dg2-g10-a0` `dg2-g10-a1` `dg2-g10-b0` `acm-g10` `ats-m150` `dg2-g10` `dg2-g10-c0`) | [`12.55.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.0_2.1.30.7z) [`12.55.1`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.1_2.1.30.7z) [`12.55.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.4_2.1.30.7z) [`12.55.8`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.55.8_2.1.30.7z) | `Intel® Arc™ A770M Graphics` `Intel® Arc™ A730M Graphics` `Intel® Arc™ A550M Graphics` `Intel® Arc™ A770 Graphics` `Intel® Arc™ A750 Graphics` `Intel® Data Center GPU Flex 170` |
| `4F87` `4F88` `5693` `5694` `5695` `56A5` `56A6` `56B0` `56B1` `56BA` `56BB` `56BC` `56BD` `56C1` | Alchemist, Intel® Data Center GPU Flex Series (`dg2-g11-a0` `dg2-g11-b0` `acm-g11` `ats-m75` `dg2-g11` `dg2-g11-b1`) | [`12.56.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.0_2.1.30.7z) [`12.56.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.4_2.1.30.7z) [`12.56.5`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.56.5_2.1.30.7z) | `Intel® Arc™ A370M Graphics` `Intel® Arc™ A350M Graphics` `Intel® Arc™ A380 Graphics` `Intel® Arc™ A310 Graphics` `Intel® Data Center GPU Flex 140` `Intel® Arc™ A-series Graphics` |
| `4F85` `4F86` `5696` `5697` `56A3` `56A4` `56B2` `56B3` | Alchemist (`acm-g12` `dg2-g12` `dg2-g12-a0`) | [`12.57.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.57.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` |
| `4F8C` `5698` `5699` `569A` `56A7` `56A8` | Alchemist (`acm-g20` `dg2-g20`) | [`12.58.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.58.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` |
| `4F89` `56A9` `56AA` | Alchemist (`acm-g21` `dg2-g21`) | [`12.59.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.59.0_2.1.30.7z) | `Intel® Arc™ A-series Graphics` |
| `7D40` `7D45` `7D60` `7D67` | Meteor Lake-M, Meteor Lake-P, Arrow Lake-U (`xe-lpg-md-a0` `mtl-m` `mtl-s` `xe-lpg-md-b0`) | [`12.70.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.70.0_2.1.30.7z) [`12.70.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.70.4_2.1.30.7z) | `Intel® Iris® Xe Graphics` `Intel® UHD Graphics` |
| `7D55` `7DD5` | Meteor Lake-P (`xe-lpg-lg-a0` `mtl-p` `xe-lpg-lg-b0`) | [`12.71.0`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.71.0_2.1.30.7z) [`12.71.4`](https://www.fosshub.com/Demucs-GUI-old.html?dwl=12.71.4_2.1.30.7z) | `Intel® Iris® Xe Graphics` |

## Install AOT enabled IPEX (Windows only, CPython 3.11)

Expand Down

0 comments on commit ffbff07

Please sign in to comment.