Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BREAKING] Remove constructors with loudness #87

Merged
merged 2 commits into from
Oct 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions NAM/convnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,6 @@ convnet::ConvNet::ConvNet(const int channels, const std::vector<int>& dilations,
throw std::runtime_error("Didn't touch all the params when initializing ConvNet");
}

convnet::ConvNet::ConvNet(const double loudness, const int channels, const std::vector<int>& dilations,
const bool batchnorm, const std::string activation, std::vector<float>& params,
const double expected_sample_rate)
: ConvNet(channels, dilations, batchnorm, activation, params, expected_sample_rate)

{
SetLoudness(loudness);
}

void convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)

{
Expand Down
13 changes: 6 additions & 7 deletions NAM/convnet.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class BatchNorm
class ConvNetBlock
{
public:
ConvNetBlock() { this->_batchnorm = false; };
ConvNetBlock(){};
void set_params_(const int in_channels, const int out_channels, const int _dilation, const bool batchnorm,
const std::string activation, std::vector<float>::iterator& params);
void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end) const;
Expand All @@ -46,29 +46,28 @@ class ConvNetBlock

private:
BatchNorm batchnorm;
bool _batchnorm;
activations::Activation* activation;
bool _batchnorm = false;
activations::Activation* activation = nullptr;
};

class _Head
{
public:
_Head() { this->_bias = (float)0.0; };
_Head(){};
_Head(const int channels, std::vector<float>::iterator& params);
void process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, const long i_end) const;

private:
Eigen::VectorXf _weight;
float _bias;
float _bias = 0.0f;
};

class ConvNet : public Buffer
{
public:
ConvNet(const int channels, const std::vector<int>& dilations, const bool batchnorm, const std::string activation,
std::vector<float>& params, const double expected_sample_rate = -1.0);
ConvNet(const double loudness, const int channels, const std::vector<int>& dilations, const bool batchnorm,
const std::string activation, std::vector<float>& params, const double expected_sample_rate = -1.0);
~ConvNet() = default;

protected:
std::vector<ConvNetBlock> _blocks;
Expand Down
20 changes: 0 additions & 20 deletions NAM/dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@ DSP::DSP(const double expected_sample_rate)
{
}

DSP::DSP(const double loudness, const double expected_sample_rate)
: mLoudness(loudness)
, mExpectedSampleRate(expected_sample_rate)
, _stale_params(true)
{
}

void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
{
// Default implementation is the null operation
Expand Down Expand Up @@ -75,12 +68,6 @@ Buffer::Buffer(const int receptive_field, const double expected_sample_rate)
this->_set_receptive_field(receptive_field);
}

Buffer::Buffer(const double loudness, const int receptive_field, const double expected_sample_rate)
: Buffer(receptive_field, expected_sample_rate)
{
SetLoudness(loudness);
}

void Buffer::_set_receptive_field(const int new_receptive_field)
{
this->_set_receptive_field(new_receptive_field, _INPUT_BUFFER_SAFETY_FACTOR * new_receptive_field);
Expand Down Expand Up @@ -165,13 +152,6 @@ Linear::Linear(const int receptive_field, const bool _bias, const std::vector<fl
this->_bias = _bias ? params[receptive_field] : (float)0.0;
}

Linear::Linear(const double loudness, const int receptive_field, const bool _bias, const std::vector<float>& params,
const double expected_sample_rate)
: Linear(receptive_field, _bias, params, expected_sample_rate)
{
SetLoudness(loudness);
}

void Linear::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
{
this->Buffer::_update_buffers_(input, num_frames);
Expand Down
12 changes: 3 additions & 9 deletions NAM/dsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,10 @@ class DSPParam
class DSP
{
public:
// Two constructors are provided: one where we know how loud the model is, and one where we don't.
// Older models won't know, but newer ones will come with a loudness from the training based on their response to a
// standardized input.
// We may choose to have the models figure out for themselves how loud they are in here in the future.
DSP(const double expected_sample_rate);
// Initialization where we know how loud the model is.
DSP(const double loudness, const double expected_sample_rate);
virtual ~DSP() = default;
// process() does all of the processing requried to take `input` array and
// fill in the required values on `output`.
Expand All @@ -75,9 +72,9 @@ class DSP
double GetLoudness() const;
// Get whether the model knows how loud it is.
bool HasLoudness() const { return mHasLoudness; };
// Option to set the loudness.
// This is included in the API so that downstream solutions can patch in the loudness of models that don't know how
// loud they are, but so one can also choose not to do so (e.g. if computational costs dictate).
// Set the loudness, in dB.
// This is usually defined to be the loudness to a standardized input. The trainer has its own, but you can always
// use this to define it a different way if you like yours better.
void SetLoudness(const double loudness);

protected:
Expand Down Expand Up @@ -106,7 +103,6 @@ class Buffer : public DSP
{
public:
Buffer(const int receptive_field, const double expected_sample_rate = -1.0);
Buffer(const double loudness, const int receptive_field, const double expected_sample_rate = -1.0);
void finalize_(const int num_frames);

protected:
Expand All @@ -132,8 +128,6 @@ class Linear : public Buffer
public:
Linear(const int receptive_field, const bool _bias, const std::vector<float>& params,
const double expected_sample_rate = -1.0);
Linear(const double loudness, const int receptive_field, const bool _bias, const std::vector<float>& params,
const double expected_sample_rate = -1.0);
void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;

protected:
Expand Down
33 changes: 14 additions & 19 deletions NAM/get_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,14 @@ std::unique_ptr<DSP> get_dsp(dspData& conf)
haveLoudness = true;
}
}
const double expected_sample_rate = conf.expected_sample_rate;
const double expectedSampleRate = conf.expected_sample_rate;

std::unique_ptr<DSP> out = nullptr;
if (architecture == "Linear")
{
const int receptive_field = config["receptive_field"];
const bool _bias = config["bias"];
return haveLoudness ? std::make_unique<Linear>(loudness, receptive_field, _bias, params, expected_sample_rate)
: std::make_unique<Linear>(receptive_field, _bias, params, expected_sample_rate);
out = std::make_unique<Linear>(receptive_field, _bias, params, expectedSampleRate);
}
else if (architecture == "ConvNet")
{
Expand All @@ -162,31 +162,23 @@ std::unique_ptr<DSP> get_dsp(dspData& conf)
for (size_t i = 0; i < config["dilations"].size(); i++)
dilations.push_back(config["dilations"][i]);
const std::string activation = config["activation"];
return haveLoudness ? std::make_unique<convnet::ConvNet>(
loudness, channels, dilations, batchnorm, activation, params, expected_sample_rate)
: std::make_unique<convnet::ConvNet>(
channels, dilations, batchnorm, activation, params, expected_sample_rate);
out = std::make_unique<convnet::ConvNet>(channels, dilations, batchnorm, activation, params, expectedSampleRate);
}
else if (architecture == "LSTM")
{
const int num_layers = config["num_layers"];
const int input_size = config["input_size"];
const int hidden_size = config["hidden_size"];
auto empty_json = nlohmann::json{};
return haveLoudness ? std::make_unique<lstm::LSTM>(
loudness, num_layers, input_size, hidden_size, params, empty_json, expected_sample_rate)
: std::make_unique<lstm::LSTM>(
num_layers, input_size, hidden_size, params, empty_json, expected_sample_rate);
out = std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size, params, empty_json, expectedSampleRate);
}
else if (architecture == "CatLSTM")
{
const int num_layers = config["num_layers"];
const int input_size = config["input_size"];
const int hidden_size = config["hidden_size"];
return haveLoudness ? std::make_unique<lstm::LSTM>(
loudness, num_layers, input_size, hidden_size, params, config["parametric"], expected_sample_rate)
: std::make_unique<lstm::LSTM>(
num_layers, input_size, hidden_size, params, config["parametric"], expected_sample_rate);
out = std::make_unique<lstm::LSTM>(
num_layers, input_size, hidden_size, params, config["parametric"], expectedSampleRate);
}
else if (architecture == "WaveNet" || architecture == "CatWaveNet")
{
Expand All @@ -208,13 +200,16 @@ std::unique_ptr<DSP> get_dsp(dspData& conf)
// initialization of 'wavenet::WaveNet' Solution from
// https://stackoverflow.com/a/73956681/3768284
auto parametric_json = architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{};
return haveLoudness ? std::make_unique<wavenet::WaveNet>(
loudness, layer_array_params, head_scale, with_head, parametric_json, params, expected_sample_rate)
: std::make_unique<wavenet::WaveNet>(
layer_array_params, head_scale, with_head, parametric_json, params, expected_sample_rate);
out = std::make_unique<wavenet::WaveNet>(
layer_array_params, head_scale, with_head, parametric_json, params, expectedSampleRate);
}
else
{
throw std::runtime_error("Unrecognized architecture");
}
if (haveLoudness)
{
out->SetLoudness(loudness);
}
return out;
}
8 changes: 0 additions & 8 deletions NAM/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,6 @@ lstm::LSTM::LSTM(const int num_layers, const int input_size, const int hidden_si
assert(it == params.end());
}

lstm::LSTM::LSTM(const double loudness, const int num_layers, const int input_size, const int hidden_size,
std::vector<float>& params, nlohmann::json& parametric, const double expected_sample_rate)
: LSTM(num_layers, input_size, hidden_size, params, parametric, expected_sample_rate)

{
SetLoudness(loudness);
}

void lstm::LSTM::_init_parametric(nlohmann::json& parametric)
{
std::vector<std::string> parametric_names;
Expand Down
2 changes: 0 additions & 2 deletions NAM/lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ class LSTM : public DSP
public:
LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector<float>& params,
nlohmann::json& parametric, const double expected_sample_rate = -1.0);
LSTM(const double loudness, const int num_layers, const int input_size, const int hidden_size,
std::vector<float>& params, nlohmann::json& parametric, const double expected_sample_rate = -1.0);
~LSTM() = default;

protected:
Expand Down
8 changes: 0 additions & 8 deletions NAM/wavenet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,14 +279,6 @@ wavenet::WaveNet::WaveNet(const std::vector<wavenet::LayerArrayParams>& layer_ar
}
}

wavenet::WaveNet::WaveNet(const double loudness, const std::vector<wavenet::LayerArrayParams>& layer_array_params,
const float head_scale, const bool with_head, nlohmann::json parametric,
std::vector<float> params, const double expected_sample_rate)
: WaveNet(layer_array_params, head_scale, with_head, parametric, params, expected_sample_rate)
{
SetLoudness(loudness);
}

void wavenet::WaveNet::finalize_(const int num_frames)
{
this->DSP::finalize_(num_frames);
Expand Down
3 changes: 0 additions & 3 deletions NAM/wavenet.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,6 @@ class WaveNet : public DSP
public:
WaveNet(const std::vector<LayerArrayParams>& layer_array_params, const float head_scale, const bool with_head,
nlohmann::json parametric, std::vector<float> params, const double expected_sample_rate = -1.0);
WaveNet(const double loudness, const std::vector<LayerArrayParams>& layer_array_params, const float head_scale,
const bool with_head, nlohmann::json parametric, std::vector<float> params,
const double expected_sample_rate = -1.0);

// WaveNet(WaveNet&&) = default;
// WaveNet& operator=(WaveNet&&) = default;
Expand Down