Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update X86 feature and CPU detection #36502

Merged
merged 5 commits into from
Jul 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 49 additions & 27 deletions src/features_x86.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#ifdef _CPU_X86_
// avx is unusable on 32bit before LLVM 5.0 due to LLVM bug (try to encode too many registers)
#define JL_X86_AVX_MIN_VER 50000
#define JL_X86_64ONLY_VER(x) UINT32_MAX
#else
#define JL_X86_AVX_MIN_VER 0
#define JL_X86_64ONLY_VER(x) x
#endif

Expand All @@ -14,16 +11,16 @@
JL_FEATURE_DEF(sse3, 0, 0)
JL_FEATURE_DEF(pclmul, 1, 0)
JL_FEATURE_DEF(ssse3, 9, 0)
JL_FEATURE_DEF(fma, 12, JL_X86_AVX_MIN_VER)
JL_FEATURE_DEF(fma, 12, 0)
JL_FEATURE_DEF(cx16, 13, JL_X86_64ONLY_VER(0)) // cx16 requires 64bit
JL_FEATURE_DEF_NAME(sse41, 19, 0, "sse4.1")
JL_FEATURE_DEF_NAME(sse42, 20, 0, "sse4.2")
JL_FEATURE_DEF(movbe, 22, 0)
JL_FEATURE_DEF(popcnt, 23, 0)
JL_FEATURE_DEF(aes, 25, 0)
JL_FEATURE_DEF(xsave, 26, 0)
JL_FEATURE_DEF(avx, 28, JL_X86_AVX_MIN_VER)
JL_FEATURE_DEF(f16c, 29, JL_X86_AVX_MIN_VER)
JL_FEATURE_DEF(avx, 28, 0)
JL_FEATURE_DEF(f16c, 29, 0)
JL_FEATURE_DEF(rdrnd, 30, 0)

// EAX=1: EDX
Expand All @@ -34,47 +31,66 @@ JL_FEATURE_DEF(fsgsbase, 32 * 2 + 0, 0)
// JL_FEATURE_DEF(sgx, 32 * 2 + 2, 0) // Disable for now since it's very hard to detect
JL_FEATURE_DEF(bmi, 32 * 2 + 3, 0)
// JL_FEATURE_DEF(hle, 32 * 2 + 4, 0) // Not used and gone in LLVM 5.0
JL_FEATURE_DEF(avx2, 32 * 2 + 5, JL_X86_AVX_MIN_VER)
JL_FEATURE_DEF(avx2, 32 * 2 + 5, 0)
JL_FEATURE_DEF(bmi2, 32 * 2 + 8, 0)
// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Not used and gone in LLVM 5.0
// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Priviledged instruction
JL_FEATURE_DEF(rtm, 32 * 2 + 11, 0)
JL_FEATURE_DEF(mpx, 32 * 2 + 14, 0)
// Disable avx512 pre-5.0 since it can't handle address space
JL_FEATURE_DEF(avx512f, 32 * 2 + 16, 50000)
JL_FEATURE_DEF(avx512dq, 32 * 2 + 17, 50000)
// JL_FEATURE_DEF(mpx, 32 * 2 + 14, 0) // Deprecated in LLVM 10.0
JL_FEATURE_DEF(avx512f, 32 * 2 + 16, 0)
JL_FEATURE_DEF(avx512dq, 32 * 2 + 17, 0)
JL_FEATURE_DEF(rdseed, 32 * 2 + 18, 0)
JL_FEATURE_DEF(adx, 32 * 2 + 19, 0)
// JL_FEATURE_DEF(smap, 32 * 2 + 20, 0) // Not used and gone in LLVM 5.0
JL_FEATURE_DEF(avx512ifma, 32 * 2 + 21, 50000)
JL_FEATURE_DEF(avx512ifma, 32 * 2 + 21, 0)
// JL_FEATURE_DEF(pcommit, 32 * 2 + 22, 0) // Deprecated
JL_FEATURE_DEF(clflushopt, 32 * 2 + 23, 0)
JL_FEATURE_DEF(clwb, 32 * 2 + 24, 0)
JL_FEATURE_DEF(avx512pf, 32 * 2 + 26, 50000)
JL_FEATURE_DEF(avx512er, 32 * 2 + 27, 50000)
JL_FEATURE_DEF(avx512cd, 32 * 2 + 28, 50000)
JL_FEATURE_DEF(avx512pf, 32 * 2 + 26, 0)
JL_FEATURE_DEF(avx512er, 32 * 2 + 27, 0)
JL_FEATURE_DEF(avx512cd, 32 * 2 + 28, 0)
JL_FEATURE_DEF(sha, 32 * 2 + 29, 0)
JL_FEATURE_DEF(avx512bw, 32 * 2 + 30, 50000)
JL_FEATURE_DEF(avx512vl, 32 * 2 + 31, 50000)
JL_FEATURE_DEF(avx512bw, 32 * 2 + 30, 0)
JL_FEATURE_DEF(avx512vl, 32 * 2 + 31, 0)

// EAX=7,ECX=0: ECX
JL_FEATURE_DEF(prefetchwt1, 32 * 3 + 0, 0)
JL_FEATURE_DEF(avx512vbmi, 32 * 3 + 1, 50000)
JL_FEATURE_DEF(avx512vbmi, 32 * 3 + 1, 0)
JL_FEATURE_DEF(pku, 32 * 3 + 4, 0) // ospke
JL_FEATURE_DEF(avx512vpopcntdq, 32 * 3 + 14, 50000)
JL_FEATURE_DEF(waitpkg, 32 * 3 + 5, 0)
JL_FEATURE_DEF(avx512vbmi2, 32 * 3 + 6, 0)
JL_FEATURE_DEF(shstk, 32 * 3 + 7, 0)
JL_FEATURE_DEF(gfni, 32 * 3 + 8, 0)
JL_FEATURE_DEF(vaes, 32 * 3 + 9, 0)
JL_FEATURE_DEF(vpclmulqdq, 32 * 3 + 10, 0)
JL_FEATURE_DEF(avx512vnni, 32 * 3 + 11, 0)
JL_FEATURE_DEF(avx512bitalg, 32 * 3 + 12, 0)
JL_FEATURE_DEF(avx512vpopcntdq, 32 * 3 + 14, 0)
JL_FEATURE_DEF(rdpid, 32 * 3 + 22, 0)
JL_FEATURE_DEF(cldemote, 32 * 3 + 25, 0)
JL_FEATURE_DEF(movdiri, 32 * 3 + 27, 0)
JL_FEATURE_DEF(movdir64b, 32 * 3 + 28, 0)
JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 90000)

// EAX=7,ECX=0: EDX
// JL_FEATURE_DEF(avx512_4vnniw, 32 * 4 + 2, ?????)
// JL_FEATURE_DEF(avx512_4fmaps, 32 * 4 + 3, ?????)
// JL_FEATURE_DEF(avx5124vnniw, 32 * 4 + 2, ?????)
// JL_FEATURE_DEF(avx5124fmaps, 32 * 4 + 3, ?????)
JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 90000)
JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
JL_FEATURE_DEF_NAME(amx_int8, 32 * 4 + 25, 110000, "amx-int8")

// EAX=0x80000001: ECX
// ignore sahf on 32bit x86 since it is required
JL_FEATURE_DEF(sahf, 32 * 5 + 0, JL_X86_64ONLY_VER(0))
JL_FEATURE_DEF(lzcnt, 32 * 5 + 5, 0)
JL_FEATURE_DEF(sse4a, 32 * 5 + 6, 0)
JL_FEATURE_DEF(prfchw, 32 * 5 + 8, 0)
JL_FEATURE_DEF(xop, 32 * 5 + 11, JL_X86_AVX_MIN_VER)
JL_FEATURE_DEF(lwp, 32 * 5 + 15, 50000)
JL_FEATURE_DEF(fma4, 32 * 5 + 16, JL_X86_AVX_MIN_VER)
JL_FEATURE_DEF(xop, 32 * 5 + 11, 0)
JL_FEATURE_DEF(lwp, 32 * 5 + 15, 0)
JL_FEATURE_DEF(fma4, 32 * 5 + 16, 0)
JL_FEATURE_DEF(tbm, 32 * 5 + 21, 0)
JL_FEATURE_DEF(mwaitx, 32 * 5 + 29, 0)

Expand All @@ -88,7 +104,13 @@ JL_FEATURE_DEF(xsavec, 32 * 7 + 1, 0)
JL_FEATURE_DEF(xsaves, 32 * 7 + 3, 0)

// EAX=0x80000008: EBX
JL_FEATURE_DEF(clzero, 32 * 8 + 0, 50000)
JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)

// EAX=7,ECX=1: EAX
JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 90000)

// EAX=0x14,ECX=0: EBX
JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)

#undef JL_X86_AVX_MIN_VER
#undef JL_X86_64ONLY_VER
Loading