Skip to content

Commit

Permalink
w
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Jan 23, 2025
1 parent b950fac commit 781a536
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 66 deletions.
44 changes: 10 additions & 34 deletions src/layer/arm/dequantize_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,17 @@ static void dequantize(const int* intptr, float* ptr, const Mat& scale_data, con

// NCNN_LOGE("dequantize %d %d %d %d", scale_data_size, bias_data_size, elemcount, elempack);

const float* scale_ptr = scale_data;

float scale = 0.f;
#if __ARM_NEON
float32x4_t _scale = vdupq_n_f32();
#endif // __ARM_NEON

if (scale_data_size == 1 || elempack == 1)
{
scale = scale_ptr[0];
float scale = scale_data[0];
#if __ARM_NEON
_scale = vdupq_n_f32(scale);
#endif // __ARM_NEON
}
else
float32x4_t _scale = vdupq_n_f32(scale);
if (scale_data_size > 1)
{
#if __ARM_NEON
if (elempack == 4)
{
_scale = vld1q_f32(scale_ptr);
_scale = vld1q_f32((const float*)scale_data);
}
#endif // __ARM_NEON
}
#endif // __ARM_NEON

if (bias_data_size == 0)
{
Expand All @@ -92,29 +80,17 @@ static void dequantize(const int* intptr, float* ptr, const Mat& scale_data, con
}
else
{
const float* bias_ptr = bias_data;

float bias = 0.f;
#if __ARM_NEON
float32x4_t _bias = vdupq_n_f32();
#endif // __ARM_NEON

if (bias_data_size == 1 || elempack == 1)
{
bias = bias_ptr[0];
float bias = bias_data[0];
#if __ARM_NEON
_bias = vdupq_n_f32(bias);
#endif // __ARM_NEON
}
else
float32x4_t _bias = vdupq_n_f32(bias);
if (bias_data_size > 1)
{
#if __ARM_NEON
if (elempack == 4)
{
_bias = vld1q_f32(bias_ptr);
_bias = vld1q_f32((const float*)bias_data);
}
#endif // __ARM_NEON
}
#endif // __ARM_NEON

int i = 0;
#if __ARM_NEON
Expand Down
46 changes: 14 additions & 32 deletions src/layer/arm/dequantize_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,19 @@ static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat& scale_da

// NCNN_LOGE("dequantize_fp16s %d %d %d %d", scale_data_size, bias_data_size, elemcount, elempack);

const float* scale_ptr = scale_data;

float scale = 0.f;
float32x4_t _scale0 = vdupq_n_f32();
float32x4_t _scale1 = vdupq_n_f32();

if (scale_data_size == 1 || elempack == 1)
{
scale = scale_ptr[0];
_scale0 = vdupq_n_f32(scale);
_scale1 = _scale0;
}
else
float scale = scale_data[0];
float32x4_t _scale0 = vdupq_n_f32(scale);
float32x4_t _scale1 = _scale0;
if (scale_data_size > 1)
{
if (elempack == 8)
{
_scale0 = vld1q_f32(scale_ptr);
_scale1 = vld1q_f32(scale_ptr + 4);
_scale0 = vld1q_f32((const float*)scale_data);
_scale1 = vld1q_f32((const float*)scale_data + 4);
}
if (elempack == 4)
{
_scale0 = vld1q_f32(scale_ptr);
_scale0 = vld1q_f32((const float*)scale_data);
_scale1 = _scale0;
}
}
Expand Down Expand Up @@ -86,28 +77,19 @@ static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat& scale_da
}
else
{
const float* bias_ptr = bias_data;

float bias = 0.f;
float32x4_t _bias0 = vdupq_n_f32();
float32x4_t _bias1 = vdupq_n_f32();

if (bias_data_size == 1 || elempack == 1)
{
bias = bias_ptr[0];
_bias0 = vdupq_n_f32(bias);
_bias1 = _bias0;
}
else
float bias = bias_data[0];
float32x4_t _bias0 = vdupq_n_f32(bias);
float32x4_t _bias1 = _bias0;
if (bias_data_size > 1)
{
if (elempack == 8)
{
_bias0 = vld1q_f32(bias_ptr);
_bias1 = vld1q_f32(bias_ptr + 4);
_bias0 = vld1q_f32((const float*)bias_data);
_bias1 = vld1q_f32((const float*)bias_data + 4);
}
if (elempack == 4)
{
_bias0 = vld1q_f32(bias_ptr);
_bias0 = vld1q_f32((const float*)bias_data);
_bias1 = _bias0;
}
}
Expand Down

0 comments on commit 781a536

Please sign in to comment.