Skip to content

Commit

Permalink
--device autoでのGPU自動選択の高度化。 ( #225 )
Browse files Browse the repository at this point in the history
共有メモリ上にGPUの使用状況を登録して参照することで精度を向上させる。
  • Loading branch information
rigaya committed Nov 26, 2024
1 parent ed65d56 commit 9c6e5ed
Show file tree
Hide file tree
Showing 13 changed files with 456 additions and 8 deletions.
7 changes: 6 additions & 1 deletion QSVEncC/QSVEncC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,12 @@ int run(int argc, TCHAR *argv[]) {
_tsetlocale(LC_ALL, _T("Japanese"));
}
#endif //#if defined(_WIN32) || defined(_WIN64)

if (Params.ctrl.processMonitorDevUsageReset) {
return processMonitorRGYDeviceResetEntry();
}
if (Params.ctrl.processMonitorDevUsage) {
return processMonitorRGYDeviceUsage(Params.ctrl.parentProcessID, (int)Params.device);
}
if (Params.bBenchmark) {
return run_benchmark(&Params);
}
Expand Down
2 changes: 2 additions & 0 deletions QSVPipeline/QSVPipeline.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
<ClCompile Include="rgy_cmd.cpp" />
<ClCompile Include="rgy_codepage.cpp" />
<ClCompile Include="rgy_def.cpp" />
<ClCompile Include="rgy_device_usage.cpp" />
<ClCompile Include="rgy_device_vulkan.cpp" />
<ClCompile Include="rgy_dummy_load.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
Expand Down Expand Up @@ -1033,6 +1034,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
<ClInclude Include="rgy_cmd.h" />
<ClInclude Include="rgy_codepage.h" />
<ClInclude Include="rgy_def.h" />
<ClInclude Include="rgy_device_usage.h" />
<ClInclude Include="rgy_device_vulkan.h" />
<ClInclude Include="rgy_dummy_load.h" />
<ClInclude Include="rgy_env.h" />
Expand Down
6 changes: 6 additions & 0 deletions QSVPipeline/QSVPipeline.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,9 @@
<ClCompile Include="rgy_libplacebo.cpp">
<Filter>ソース ファイル</Filter>
</ClCompile>
<ClCompile Include="rgy_device_usage.cpp">
<Filter>ソース ファイル</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="qsv_prm.h">
Expand Down Expand Up @@ -716,6 +719,9 @@
<ClInclude Include="rgy_device_vulkan.h">
<Filter>ヘッダー ファイル</Filter>
</ClInclude>
<ClInclude Include="rgy_device_usage.h">
<Filter>ヘッダー ファイル</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="rgy_filter.cl">
Expand Down
27 changes: 23 additions & 4 deletions QSVPipeline/qsv_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3377,6 +3377,18 @@ RGY_ERR CQSVPipeline::deviceAutoSelect(const sInputParams *prm, std::vector<std:
if (gpuList.size() <= 1) {
return RGY_ERR_NONE;
}
int maxDeviceUsageCount = 1;
std::vector<std::pair<int, int64_t>> deviceUsage;
if (gpuList.size() > 1) {
RGYDeviceUsage devUsage;
deviceUsage = devUsage.getUsage();
for (size_t i = 0; i < deviceUsage.size(); i++) {
maxDeviceUsageCount = std::max(maxDeviceUsageCount, deviceUsage[i].first);
if (deviceUsage[i].first > 0) {
PrintMes(RGY_LOG_DEBUG, _T("Device #%d: %d usage.\n"), i, deviceUsage[i].first);
}
}
}
#if ENABLE_PERF_COUNTER
PrintMes(RGY_LOG_DEBUG, _T("Auto select device from %d devices.\n"), (int)gpuList.size());
bool counterIsIntialized = m_pPerfMonitor->isPerfCounterInitialized();
Expand All @@ -3403,15 +3415,17 @@ RGY_ERR CQSVPipeline::deviceAutoSelect(const sInputParams *prm, std::vector<std:
RGYGPUCounterWinEntries(counters).filter_type(L"compute").max()), //vce-opencl
RGYGPUCounterWinEntries(counters).filter_type(L"3d").max()), //qsv
RGYGPUCounterWinEntries(counters).filter_type(L"videoprocessing").max());
const int deviceUsageCount = (int)gpu->deviceNum() < (int)deviceUsage.size() ? deviceUsage[(int)gpu->deviceNum()].first : 0;
double usage_score = 100.0 * (maxDeviceUsageCount - deviceUsageCount) / (double)maxDeviceUsageCount;
double core_score = 0.0;
double cc_score = 0.0;
double ve_score = 100.0 * (1.0 - std::pow(ve_utilization / 100.0, 1.0)) * prm->ctrl.gpuSelect.ve;
double gpu_score = 100.0 * (1.0 - std::pow(gpu_utilization / 100.0, 1.5)) * prm->ctrl.gpuSelect.gpu;
double cl_score = gpu->devInfo() ? 0.0 : -100.0; // openclの初期化に成功したか?
double cl_score = gpu->devInfo() ? 0.0 : maxDeviceUsageCount * -100.0; // openclの初期化に成功したか?

gpuscore[gpu->deviceNum()] = cc_score + ve_score + gpu_score + core_score + cl_score;
PrintMes(RGY_LOG_DEBUG, _T("GPU #%d (%s) score: %.1f: VE %.1f, GPU %.1f, CC %.1f, Core %.1f, CL %.1f.\n"), gpu->deviceNum(), gpu->name().c_str(),
gpuscore[gpu->deviceNum()], ve_score, gpu_score, cc_score, core_score, cl_score);
gpuscore[gpu->deviceNum()] = usage_score + cc_score + ve_score + gpu_score + core_score + cl_score;
PrintMes(RGY_LOG_DEBUG, _T("GPU #%d (%s) score: %.1f: Use: %.1f, VE %.1f, GPU %.1f, CC %.1f, Core %.1f, CL %.1f.\n"), gpu->deviceNum(), gpu->name().c_str(),
gpuscore[gpu->deviceNum()], usage_score, ve_score, gpu_score, cc_score, core_score, cl_score);
}
std::sort(gpuList.begin(), gpuList.end(), [&](const std::unique_ptr<QSVDevice> &a, const std::unique_ptr<QSVDevice> &b) {
if (gpuscore.at(a->deviceNum()) != gpuscore.at(b->deviceNum())) {
Expand All @@ -3430,6 +3444,7 @@ RGY_ERR CQSVPipeline::deviceAutoSelect(const sInputParams *prm, std::vector<std:

RGY_ERR CQSVPipeline::InitSession(const sInputParams *inputParam, std::vector<std::unique_ptr<QSVDevice>>& deviceList) {
auto err = RGY_ERR_NONE;
const int deviceCount = (int)deviceList.size();
if (deviceList.size() == 0) {
PrintMes(RGY_LOG_DEBUG, _T("No device found for QSV encoding!\n"));
return RGY_ERR_DEVICE_NOT_FOUND;
Expand All @@ -3445,6 +3460,10 @@ RGY_ERR CQSVPipeline::InitSession(const sInputParams *inputParam, std::vector<st
m_device = std::move(deviceList.front());
PrintMes(RGY_LOG_DEBUG, _T("InitSession: selected device #%d: %s.\n"), (int)m_device->deviceNum(), m_device->name().c_str());
}
if (deviceCount > 1) {
RGYDeviceUsage devUsage;
devUsage.startProcessMonitor((int)m_device->deviceNum());
}

//使用できる最大のversionをチェック
m_device->mfxSession().QueryVersion(&m_mfxVer);
Expand Down
1 change: 1 addition & 0 deletions QSVPipeline/qsv_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "rgy_output.h"
#include "rgy_opencl.h"
#include "rgy_dummy_load.h"
#include "rgy_device_usage.h"
#include "qsv_vpp_mfx.h"
#include "qsv_mfx_dec.h"
#include "qsv_pipeline_ctrl.h"
Expand Down
14 changes: 13 additions & 1 deletion QSVPipeline/rgy_cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6988,6 +6988,14 @@ int parse_one_ctrl_option(const TCHAR *option_name, const TCHAR *strInput[], int
ctrl->enableVulkan = true;
return 0;
}
if (IS_OPTION("process-monitor-dev-usage")) {
ctrl->processMonitorDevUsage = true;
return 0;
}
if (IS_OPTION("process-monitor-dev-usage-reset")) {
ctrl->processMonitorDevUsageReset = true;
return 0;
}
return -10;
}

Expand Down Expand Up @@ -8357,7 +8365,9 @@ tstring gen_cmd(const RGYParamControl *param, const RGYParamControl *defaultPrm,
}
}
OPT_NUM(_T("--perf-monitor-interval"), perfMonitorInterval);
OPT_NUM(_T("--parent-pid"), parentProcessID);
if (param->parentProcessID != defaultPrm->parentProcessID) {
cmd << strsprintf(_T(" --parent-pid %x"), param->parentProcessID);
}
if (param->gpuSelect != defaultPrm->gpuSelect) {
std::basic_stringstream<TCHAR> tmp;
tmp.str(tstring());
Expand All @@ -8373,6 +8383,8 @@ tstring gen_cmd(const RGYParamControl *param, const RGYParamControl *defaultPrm,
OPT_BOOL(_T("--enable-opencl"), _T("--disable-opencl"), enableOpenCL);
#endif
OPT_BOOL(_T("--enable-vulkan"), _T("--disable-vulkan"), enableVulkan);
OPT_BOOL(_T("--process-monitor-dev-usage"), _T(""), processMonitorDevUsage);
OPT_BOOL(_T("--process-monitor-dev-usage-reset"), _T(""), processMonitorDevUsageReset);
return cmd.str();
}

Expand Down
Loading

0 comments on commit 9c6e5ed

Please sign in to comment.