Skip to content

Commit

Permalink
Merge pull request #3094 from coqui-ai/dev
Browse files Browse the repository at this point in the history
v0.18.1
  • Loading branch information
erogol authored Oct 21, 2023
2 parents 4ac821a + 1e15269 commit a0d530e
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 13 deletions.
10 changes: 5 additions & 5 deletions TTS/.models.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
"xtts_v1.1": {
"description": "XTTS-v1.1 by Coqui with 14 languages, cross-language voice cloning and reference leak fixed.",
"hf_url": [
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/model.pth",
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/config.json",
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/vocab.json",
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/hash.md5"
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/model.pth",
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/config.json",
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/vocab.json",
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.1/hash.md5"
],
"model_hash": "10163afc541dc86801b33d1f3217b456",
"model_hash": "ae9e4b39e095fd5728fe7f7931ec66ad",
"default_vocoder": null,
"commit": "82910a63",
"license": "CPML",
Expand Down
2 changes: 1 addition & 1 deletion TTS/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.18.0
0.18.2
6 changes: 3 additions & 3 deletions TTS/tts/configs/xtts_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ class XttsConfig(BaseTTSConfig):
)

# inference params
temperature: float = 0.2
temperature: float = 0.85
length_penalty: float = 1.0
repetition_penalty: float = 2.0
top_k: int = 50
top_p: float = 0.8
top_p: float = 0.85
cond_free_k: float = 2.0
diffusion_temperature: float = 1.0
num_gpt_outputs: int = 16
num_gpt_outputs: int = 1
decoder_iterations: int = 30
decoder_sampler: str = "ddim"
4 changes: 3 additions & 1 deletion TTS/tts/layers/xtts/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from tokenizers import Tokenizer

import pypinyin
import cutlet
from num2words import num2words
from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words

Expand Down Expand Up @@ -484,10 +483,13 @@ def preprocess_text(self, txt, lang):
if lang == "zh-cn":
txt = chinese_transliterate(txt)
elif lang == "ja":
assert txt[:4] == "[ja]", "Japanese speech should start with the [ja] token."
txt = txt[4:]
if self.katsu is None:
import cutlet
self.katsu = cutlet.Cutlet()
txt = japanese_cleaners(txt, self.katsu)
txt = "[ja]" + txt
else:
raise NotImplementedError()
return txt
Expand Down
11 changes: 8 additions & 3 deletions TTS/tts/models/xtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,8 +821,6 @@ def load_checkpoint(
self.tokenizer = VoiceBpeTokenizer(vocab_file=vocab_path)

self.init_models()
if eval:
self.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache)

checkpoint = load_fsspec(model_path, map_location=torch.device("cpu"))["model"]
ignore_keys = ["diffusion_decoder", "vocoder"] if self.args.use_hifigan or self.args.use_ne_hifigan else []
Expand All @@ -831,7 +829,14 @@ def load_checkpoint(
for key in list(checkpoint.keys()):
if key.split(".")[0] in ignore_keys:
del checkpoint[key]
self.load_state_dict(checkpoint, strict=strict)

# deal with v1 and v1.1. V1 has the init_gpt_for_inference keys, v1.1 do not
try:
self.load_state_dict(checkpoint, strict=strict)
except:
if eval:
self.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache)
self.load_state_dict(checkpoint, strict=strict)

if eval:
if hasattr(self, "hifigan_decoder"): self.hifigan_decoder.eval()
Expand Down

0 comments on commit a0d530e

Please sign in to comment.