Skip to content

Commit

Permalink
Output loudness normalization (#18)
Browse files Browse the repository at this point in the history
* Implement loudness

* Apply normalization if active

* Fix JSON find

* Adjust target loudness
  • Loading branch information
sdatkinson authored Apr 2, 2023
1 parent f57582e commit c0d0c2c
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 17 deletions.
33 changes: 24 additions & 9 deletions NAM/dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#define tanh_impl_ std::tanh
// #define tanh_impl_ fast_tanh_

constexpr auto _INPUT_BUFFER_SAFETY_FACTOR = 32;
constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32;

DSP::DSP() { this->_stale_params = true; }
DSP::DSP() : mLoudness(TARGET_DSP_LOUDNESS), mNormalizeOutputLoudness(false), _stale_params(true) {}

DSP::DSP(const double loudness) : mLoudness(loudness), mNormalizeOutputLoudness(false), _stale_params(true) {}

void DSP::process(double **inputs, double **outputs,
const int num_channels, const int num_frames,
Expand Down Expand Up @@ -71,14 +73,18 @@ void DSP::_process_core_() {

void DSP::_apply_output_level_(double **outputs, const int num_channels,
const int num_frames, const double gain) {
for (int c = 0; c < num_channels; c++)
const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0);
const double finalGain = this->mNormalizeOutputLoudness ? gain * loudnessGain : gain;
for (int c = 0; c < num_channels; c++)
for (int s = 0; s < num_frames; s++)
outputs[c][s] = double(gain * this->_core_dsp_output[s]);
outputs[c][s] = double(finalGain * this->_core_dsp_output[s]);
}

// Buffer =====================================================================

Buffer::Buffer(const int receptive_field) : DSP() {
Buffer::Buffer(const int receptive_field) : Buffer(TARGET_DSP_LOUDNESS, receptive_field) {}

Buffer::Buffer(const double loudness, const int receptive_field) : DSP(loudness) {
this->_set_receptive_field(receptive_field);
}

Expand Down Expand Up @@ -146,8 +152,13 @@ void Buffer::finalize_(const int num_frames) {
// Linear =====================================================================

Linear::Linear(const int receptive_field, const bool _bias,
const std::vector<float> &params)
: Buffer(receptive_field) {
const std::vector<float>& params) : Linear(TARGET_DSP_LOUDNESS, receptive_field, _bias, params)
{}

Linear::Linear(const double loudness, const int receptive_field, const bool _bias,
const std::vector<float>& params)
: Buffer(loudness, receptive_field)
{
if (params.size() != (receptive_field + (_bias ? 1 : 0)))
throw std::runtime_error("Params vector does not match expected size based "
"on architecture parameters");
Expand Down Expand Up @@ -426,10 +437,14 @@ void convnet::_Head::process_(const Eigen::MatrixXf &input,
output(i) = this->_bias + input.col(j).dot(this->_weight);
}

convnet::ConvNet::ConvNet(const int channels, const std::vector<int> &dilations,
convnet::ConvNet::ConvNet(const int channels, const std::vector<int>& dilations,
const bool batchnorm, const std::string activation,
std::vector<float>& params) : ConvNet(TARGET_DSP_LOUDNESS, channels, dilations, batchnorm, activation, params) {}

convnet::ConvNet::ConvNet(const double loudness, const int channels, const std::vector<int> &dilations,
const bool batchnorm, const std::string activation,
std::vector<float> &params)
: Buffer(*std::max_element(dilations.begin(), dilations.end())) {
: Buffer(loudness, *std::max_element(dilations.begin(), dilations.end())) {
this->_verify_params(channels, dilations, batchnorm, params.size());
this->_blocks.resize(dilations.size());
std::vector<float>::iterator it = params.begin();
Expand Down
17 changes: 16 additions & 1 deletion NAM/dsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,13 @@ class DSPParam {
};
// And the params shall be provided as a std::vector<DSPParam>.

// How loud do we want the models to be? in dB
#define TARGET_DSP_LOUDNESS -18.0

class DSP {
public:
DSP();
DSP();
DSP(const double loudness);
// process() does all of the processing requried to take `inputs` array and
// fill in the required values on `outputs`.
// To do this:
Expand All @@ -51,8 +55,13 @@ class DSP {
// that actually uses them, which varies depends on the particulars of the
// DSP subclass implementation.
virtual void finalize_(const int num_frames);
void SetNormalize(const bool normalize) { this->mNormalizeOutputLoudness = normalize; };

protected:
// How loud is the model?
double mLoudness;
// Should we normalize according to this loudness?
bool mNormalizeOutputLoudness;
// Parameters (aka "knobs")
std::unordered_map<std::string, double> _params;
// If the params have changed since the last buffer was processed:
Expand Down Expand Up @@ -94,6 +103,7 @@ class DSP {
class Buffer : public DSP {
public:
Buffer(const int receptive_field);
Buffer(const double loudness, const int receptive_field);
void finalize_(const int num_frames);

protected:
Expand All @@ -119,6 +129,8 @@ class Linear : public Buffer {
public:
Linear(const int receptive_field, const bool _bias,
const std::vector<float> &params);
Linear(const double loudness, const int receptive_field, const bool _bias,
const std::vector<float>& params);
void _process_core_() override;

protected:
Expand Down Expand Up @@ -270,6 +282,9 @@ class ConvNet : public Buffer {
ConvNet(const int channels, const std::vector<int> &dilations,
const bool batchnorm, const std::string activation,
std::vector<float> &params);
ConvNet(const double loudness, const int channels, const std::vector<int>& dilations,
const bool batchnorm, const std::string activation,
std::vector<float>& params);

protected:
std::vector<ConvNetBlock> _blocks;
Expand Down
19 changes: 14 additions & 5 deletions NAM/get_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,32 +46,41 @@ std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename) {
auto architecture = j["architecture"];
nlohmann::json config = j["config"];
std::vector<float> params = _get_weights(j, config_filename);
bool haveLoudness = false;
double loudness = TARGET_DSP_LOUDNESS;
if (j.find("metadata") != j.end()) {
if (j["metadata"].find("loudness") != j["metadata"].end()) {
loudness = j["metadata"]["loudness"];
haveLoudness = true;
}
}


if (architecture == "Linear") {
const int receptive_field = config["receptive_field"];
const bool _bias = config["bias"];
return std::make_unique<Linear>(receptive_field, _bias, params);
return std::make_unique<Linear>(loudness, receptive_field, _bias, params);
} else if (architecture == "ConvNet") {
const int channels = config["channels"];
const bool batchnorm = config["batchnorm"];
std::vector<int> dilations;
for (int i = 0; i < config["dilations"].size(); i++)
dilations.push_back(config["dilations"][i]);
const std::string activation = config["activation"];
return std::make_unique<convnet::ConvNet>(channels, dilations, batchnorm,
return std::make_unique<convnet::ConvNet>(loudness, channels, dilations, batchnorm,
activation, params);
} else if (architecture == "LSTM") {
const int num_layers = config["num_layers"];
const int input_size = config["input_size"];
const int hidden_size = config["hidden_size"];
auto json = nlohmann::json{};
return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
return std::make_unique<lstm::LSTM>(loudness, num_layers, input_size, hidden_size,
params, json);
} else if (architecture == "CatLSTM") {
const int num_layers = config["num_layers"];
const int input_size = config["input_size"];
const int hidden_size = config["hidden_size"];
return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
return std::make_unique<lstm::LSTM>(loudness, num_layers, input_size, hidden_size,
params, config["parametric"]);
} else if (architecture == "WaveNet" || architecture == "CatWaveNet") {
std::vector<wavenet::LayerArrayParams> layer_array_params;
Expand All @@ -94,7 +103,7 @@ std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename) {
auto parametric_json =
architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{};
return std::make_unique<wavenet::WaveNet>(
layer_array_params, head_scale, with_head, parametric_json, params);
loudness, layer_array_params, head_scale, with_head, parametric_json, params);
} else {
throw std::runtime_error("Unrecognized architecture");
}
Expand Down
6 changes: 5 additions & 1 deletion NAM/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@ void lstm::LSTMCell::process_(const Eigen::VectorXf &x) {
}

lstm::LSTM::LSTM(const int num_layers, const int input_size,
const int hidden_size, std::vector<float>& params,
nlohmann::json& parametric) : LSTM(TARGET_DSP_LOUDNESS, num_layers, input_size, hidden_size, params, parametric) {}

lstm::LSTM::LSTM(const double loudness, const int num_layers, const int input_size,
const int hidden_size, std::vector<float> &params,
nlohmann::json &parametric) {
nlohmann::json &parametric) : DSP(loudness) {
this->_init_parametric(parametric);
std::vector<float>::iterator it = params.begin();
for (int i = 0; i < num_layers; i++)
Expand Down
2 changes: 2 additions & 0 deletions NAM/lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class LSTM : public DSP {
public:
LSTM(const int num_layers, const int input_size, const int hidden_size,
std::vector<float> &params, nlohmann::json &parametric);
LSTM(const double loudness, const int num_layers, const int input_size, const int hidden_size,
std::vector<float>& params, nlohmann::json& parametric);

protected:
Eigen::VectorXf _head_weight;
Expand Down
8 changes: 7 additions & 1 deletion NAM/wavenet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,16 @@ void wavenet::_Head::_apply_activation_(Eigen::MatrixXf &x) {
// WaveNet ====================================================================

wavenet::WaveNet::WaveNet(
const std::vector<wavenet::LayerArrayParams>& layer_array_params,
const float head_scale, const bool with_head, nlohmann::json parametric,
std::vector<float> params) : WaveNet(TARGET_DSP_LOUDNESS, layer_array_params, head_scale, with_head, parametric, params) {}

wavenet::WaveNet::WaveNet(
const double loudness,
const std::vector<wavenet::LayerArrayParams> &layer_array_params,
const float head_scale, const bool with_head, nlohmann::json parametric,
std::vector<float> params)
: //_head(channels, head_layers, head_channels, head_activation),
: DSP(loudness),
_num_frames(0), _head_scale(head_scale) {
if (with_head)
throw std::runtime_error("Head not implemented!");
Expand Down
3 changes: 3 additions & 0 deletions NAM/wavenet.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ class WaveNet : public DSP {
WaveNet(const std::vector<LayerArrayParams> &layer_array_params,
const float head_scale, const bool with_head,
nlohmann::json parametric, std::vector<float> params);
WaveNet(const double loudness, const std::vector<LayerArrayParams>& layer_array_params,
const float head_scale, const bool with_head,
nlohmann::json parametric, std::vector<float> params);

// WaveNet(WaveNet&&) = default;
// WaveNet& operator=(WaveNet&&) = default;
Expand Down

0 comments on commit c0d0c2c

Please sign in to comment.