Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Output loudness normalization #18

Merged
merged 5 commits into from
Apr 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions NAM/dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#define tanh_impl_ std::tanh
// #define tanh_impl_ fast_tanh_

constexpr auto _INPUT_BUFFER_SAFETY_FACTOR = 32;
constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32;

DSP::DSP() { this->_stale_params = true; }
DSP::DSP() : mLoudness(TARGET_DSP_LOUDNESS), mNormalizeOutputLoudness(false), _stale_params(true) {}

DSP::DSP(const double loudness) : mLoudness(loudness), mNormalizeOutputLoudness(false), _stale_params(true) {}

void DSP::process(double **inputs, double **outputs,
const int num_channels, const int num_frames,
Expand Down Expand Up @@ -71,14 +73,18 @@ void DSP::_process_core_() {

void DSP::_apply_output_level_(double **outputs, const int num_channels,
const int num_frames, const double gain) {
for (int c = 0; c < num_channels; c++)
const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0);
const double finalGain = this->mNormalizeOutputLoudness ? gain * loudnessGain : gain;
for (int c = 0; c < num_channels; c++)
for (int s = 0; s < num_frames; s++)
outputs[c][s] = double(gain * this->_core_dsp_output[s]);
outputs[c][s] = double(finalGain * this->_core_dsp_output[s]);
}

// Buffer =====================================================================

Buffer::Buffer(const int receptive_field) : DSP() {
Buffer::Buffer(const int receptive_field) : Buffer(TARGET_DSP_LOUDNESS, receptive_field) {}

Buffer::Buffer(const double loudness, const int receptive_field) : DSP(loudness) {
this->_set_receptive_field(receptive_field);
}

Expand Down Expand Up @@ -146,8 +152,13 @@ void Buffer::finalize_(const int num_frames) {
// Linear =====================================================================

Linear::Linear(const int receptive_field, const bool _bias,
const std::vector<float> &params)
: Buffer(receptive_field) {
const std::vector<float>& params) : Linear(TARGET_DSP_LOUDNESS, receptive_field, _bias, params)
{}

Linear::Linear(const double loudness, const int receptive_field, const bool _bias,
const std::vector<float>& params)
: Buffer(loudness, receptive_field)
{
if (params.size() != (receptive_field + (_bias ? 1 : 0)))
throw std::runtime_error("Params vector does not match expected size based "
"on architecture parameters");
Expand Down Expand Up @@ -426,10 +437,14 @@ void convnet::_Head::process_(const Eigen::MatrixXf &input,
output(i) = this->_bias + input.col(j).dot(this->_weight);
}

convnet::ConvNet::ConvNet(const int channels, const std::vector<int> &dilations,
convnet::ConvNet::ConvNet(const int channels, const std::vector<int>& dilations,
const bool batchnorm, const std::string activation,
std::vector<float>& params) : ConvNet(TARGET_DSP_LOUDNESS, channels, dilations, batchnorm, activation, params) {}

convnet::ConvNet::ConvNet(const double loudness, const int channels, const std::vector<int> &dilations,
const bool batchnorm, const std::string activation,
std::vector<float> &params)
: Buffer(*std::max_element(dilations.begin(), dilations.end())) {
: Buffer(loudness, *std::max_element(dilations.begin(), dilations.end())) {
this->_verify_params(channels, dilations, batchnorm, params.size());
this->_blocks.resize(dilations.size());
std::vector<float>::iterator it = params.begin();
Expand Down
17 changes: 16 additions & 1 deletion NAM/dsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,13 @@ class DSPParam {
};
// And the params shall be provided as a std::vector<DSPParam>.

// How loud do we want the models to be? in dB
#define TARGET_DSP_LOUDNESS -18.0

class DSP {
public:
DSP();
DSP();
DSP(const double loudness);
// process() does all of the processing requried to take `inputs` array and
// fill in the required values on `outputs`.
// To do this:
Expand All @@ -51,8 +55,13 @@ class DSP {
// that actually uses them, which varies depends on the particulars of the
// DSP subclass implementation.
virtual void finalize_(const int num_frames);
void SetNormalize(const bool normalize) { this->mNormalizeOutputLoudness = normalize; };

protected:
// How loud is the model?
double mLoudness;
// Should we normalize according to this loudness?
bool mNormalizeOutputLoudness;
// Parameters (aka "knobs")
std::unordered_map<std::string, double> _params;
// If the params have changed since the last buffer was processed:
Expand Down Expand Up @@ -94,6 +103,7 @@ class DSP {
class Buffer : public DSP {
public:
Buffer(const int receptive_field);
Buffer(const double loudness, const int receptive_field);
void finalize_(const int num_frames);

protected:
Expand All @@ -119,6 +129,8 @@ class Linear : public Buffer {
public:
Linear(const int receptive_field, const bool _bias,
const std::vector<float> &params);
Linear(const double loudness, const int receptive_field, const bool _bias,
const std::vector<float>& params);
void _process_core_() override;

protected:
Expand Down Expand Up @@ -270,6 +282,9 @@ class ConvNet : public Buffer {
ConvNet(const int channels, const std::vector<int> &dilations,
const bool batchnorm, const std::string activation,
std::vector<float> &params);
ConvNet(const double loudness, const int channels, const std::vector<int>& dilations,
const bool batchnorm, const std::string activation,
std::vector<float>& params);

protected:
std::vector<ConvNetBlock> _blocks;
Expand Down
19 changes: 14 additions & 5 deletions NAM/get_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,32 +46,41 @@ std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename) {
auto architecture = j["architecture"];
nlohmann::json config = j["config"];
std::vector<float> params = _get_weights(j, config_filename);
bool haveLoudness = false;
double loudness = TARGET_DSP_LOUDNESS;
if (j.find("metadata") != j.end()) {
if (j["metadata"].find("loudness") != j["metadata"].end()) {
loudness = j["metadata"]["loudness"];
haveLoudness = true;
}
}


if (architecture == "Linear") {
const int receptive_field = config["receptive_field"];
const bool _bias = config["bias"];
return std::make_unique<Linear>(receptive_field, _bias, params);
return std::make_unique<Linear>(loudness, receptive_field, _bias, params);
} else if (architecture == "ConvNet") {
const int channels = config["channels"];
const bool batchnorm = config["batchnorm"];
std::vector<int> dilations;
for (int i = 0; i < config["dilations"].size(); i++)
dilations.push_back(config["dilations"][i]);
const std::string activation = config["activation"];
return std::make_unique<convnet::ConvNet>(channels, dilations, batchnorm,
return std::make_unique<convnet::ConvNet>(loudness, channels, dilations, batchnorm,
activation, params);
} else if (architecture == "LSTM") {
const int num_layers = config["num_layers"];
const int input_size = config["input_size"];
const int hidden_size = config["hidden_size"];
auto json = nlohmann::json{};
return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
return std::make_unique<lstm::LSTM>(loudness, num_layers, input_size, hidden_size,
params, json);
} else if (architecture == "CatLSTM") {
const int num_layers = config["num_layers"];
const int input_size = config["input_size"];
const int hidden_size = config["hidden_size"];
return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
return std::make_unique<lstm::LSTM>(loudness, num_layers, input_size, hidden_size,
params, config["parametric"]);
} else if (architecture == "WaveNet" || architecture == "CatWaveNet") {
std::vector<wavenet::LayerArrayParams> layer_array_params;
Expand All @@ -94,7 +103,7 @@ std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename) {
auto parametric_json =
architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{};
return std::make_unique<wavenet::WaveNet>(
layer_array_params, head_scale, with_head, parametric_json, params);
loudness, layer_array_params, head_scale, with_head, parametric_json, params);
} else {
throw std::runtime_error("Unrecognized architecture");
}
Expand Down
6 changes: 5 additions & 1 deletion NAM/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@ void lstm::LSTMCell::process_(const Eigen::VectorXf &x) {
}

lstm::LSTM::LSTM(const int num_layers, const int input_size,
const int hidden_size, std::vector<float>& params,
nlohmann::json& parametric) : LSTM(TARGET_DSP_LOUDNESS, num_layers, input_size, hidden_size, params, parametric) {}

lstm::LSTM::LSTM(const double loudness, const int num_layers, const int input_size,
const int hidden_size, std::vector<float> &params,
nlohmann::json &parametric) {
nlohmann::json &parametric) : DSP(loudness) {
this->_init_parametric(parametric);
std::vector<float>::iterator it = params.begin();
for (int i = 0; i < num_layers; i++)
Expand Down
2 changes: 2 additions & 0 deletions NAM/lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class LSTM : public DSP {
public:
LSTM(const int num_layers, const int input_size, const int hidden_size,
std::vector<float> &params, nlohmann::json &parametric);
LSTM(const double loudness, const int num_layers, const int input_size, const int hidden_size,
std::vector<float>& params, nlohmann::json& parametric);

protected:
Eigen::VectorXf _head_weight;
Expand Down
8 changes: 7 additions & 1 deletion NAM/wavenet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,16 @@ void wavenet::_Head::_apply_activation_(Eigen::MatrixXf &x) {
// WaveNet ====================================================================

wavenet::WaveNet::WaveNet(
const std::vector<wavenet::LayerArrayParams>& layer_array_params,
const float head_scale, const bool with_head, nlohmann::json parametric,
std::vector<float> params) : WaveNet(TARGET_DSP_LOUDNESS, layer_array_params, head_scale, with_head, parametric, params) {}

wavenet::WaveNet::WaveNet(
const double loudness,
const std::vector<wavenet::LayerArrayParams> &layer_array_params,
const float head_scale, const bool with_head, nlohmann::json parametric,
std::vector<float> params)
: //_head(channels, head_layers, head_channels, head_activation),
: DSP(loudness),
_num_frames(0), _head_scale(head_scale) {
if (with_head)
throw std::runtime_error("Head not implemented!");
Expand Down
3 changes: 3 additions & 0 deletions NAM/wavenet.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ class WaveNet : public DSP {
WaveNet(const std::vector<LayerArrayParams> &layer_array_params,
const float head_scale, const bool with_head,
nlohmann::json parametric, std::vector<float> params);
WaveNet(const double loudness, const std::vector<LayerArrayParams>& layer_array_params,
const float head_scale, const bool with_head,
nlohmann::json parametric, std::vector<float> params);

// WaveNet(WaveNet&&) = default;
// WaveNet& operator=(WaveNet&&) = default;
Expand Down