Skip to content

Commit

Permalink
feat✨(audio): add audio api
Browse files Browse the repository at this point in the history
Signed-off-by: zjzjzjzj1874 <[email protected]>
  • Loading branch information
zjzjzjzj1874 committed Mar 28, 2023
1 parent 2db6e5d commit a8976cc
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 3 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
chatgpt
.idea
cmd/gptx
cmd/gptx

cmd/*.mp3
cmd/*.mp4
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ Usage:
gptx [command]

Available Commands:
audio turn audio into text.
chat creates a completion for the chat message
help Help about any command
img creates an image given a prompt.
model lists the currently available models
model lists the currently available models,

Flags:
-h, --help help for gptx
Expand Down Expand Up @@ -70,6 +71,16 @@ Total Image: 2
Url: https://oaidalleapiprodscus.blob.core.windows.net/private/org-FszeU94XqTOxWst1f2mp5LpO/user-qcjpFAv1q7NKNH42MHry25KB/img-r3lAOCz0DSmypxl3X5w3ZWyE.png?st=2023-03-24T05%3A27%3A14Z&se=2023-03-24T07%3A27%3A14Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-03-23T22%3A08%3A23Z&ske=2023-03-24T22%3A08%3A23Z&sks=b&skv=2021-08-06&sig=%2BaFB5nW23BeT6XGdrcSS1M2wvWeWbywJnebdp9wdza8%3D
Url: https://oaidalleapiprodscus.blob.core.windows.net/private/org-FszeU94XqTOxWst1f2mp5LpO/user-qcjpFAv1q7NKNH42MHry25KB/img-r3XgIswuunVwZ6NlwP0NnUAG.png?st=2023-03-24T05%3A27%3A14Z&se=2023-03-24T07%3A27%3A14Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-03-23T22%3A08%3A23Z&ske=2023-03-24T22%3A08%3A23Z&sks=b&skv=2021-08-06&sig=nvVZDD3hsaxPtaS9sxyfvwr2x7u0mF4/9cbts8t60I0%3D

```
### 音频转文字

```Bash
-f: 待转文件
-m: gpt模型,默认使用whisper-1
-l: 语言,参考 https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
./gptx audio trans -f 5.6.mp3 -l en

翻译结果:John, John, you are so dumb. John, John, you are so dumb. John, John, you are so dumb. John, John, you are so dumb.
```

## TODO list
Expand Down
28 changes: 28 additions & 0 deletions cmd/audio/audio.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package audio

import (
"github.com/spf13/cobra"
)

const (
defaultModel = "whisper-1" // 默认模型
)

var (
file string // 文件
model string // gpt模型
prompt string // 提示
language string // Supplying the input language in ISO-639-1 format will improve accuracy and latency. link-at:https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
)

func init() {
Cmd.AddCommand(transCmd)
Cmd.AddCommand(transcCmd)
}

var (
Cmd = &cobra.Command{
Use: "audio",
Short: "turn audio into text.",
}
)
80 changes: 80 additions & 0 deletions cmd/audio/transcription.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package audio

import (
"bytes"
"io"
"mime/multipart"
"net/http"
"os"

"github.com/fatih/color"
"github.com/spf13/cobra"

"github.com/zjzjzjzj1874/chatgpt/pkg"
)

func init() {
transcCmd.Flags().StringVarP(&file, "file", "f", "", "The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.")
transcCmd.Flags().StringVarP(&model, "model", "m", defaultModel, "ID of the model to use. Only whisper-1 is currently available.")
transcCmd.Flags().StringVarP(&prompt, "prompt", "p", "", "An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.")
transcCmd.Flags().StringVarP(&language, "language", "l", "", "The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.")
transcCmd.MarkFlagsRequiredTogether("file")
}

var (
transcCmd = &cobra.Command{
Use: "transc",
Short: "Transcribes audio into the input language.",
Run: func(cmd *cobra.Command, args []string) {
if len(file) == 0 {
color.Red("%s", "Please input your file")
return
}
fi, err := os.Open(file)
if err != nil {
color.Red("Open file(%s) failure:%s", file, err.Error())
return
}

body := new(bytes.Buffer)
writer := multipart.NewWriter(body)
part, err := writer.CreateFormFile("file", file)
if err != nil {
color.Red("CreateFormFile file(%s) failure:%s", file, err.Error())
return
}
_, err = io.Copy(part, fi)
if err != nil {
color.Red("Copy file(%s) failure:%s", file, err.Error())
return
}
if len(model) != 0 {
_ = writer.WriteField("model", model)
}
if len(prompt) != 0 {
_ = writer.WriteField("prompt", prompt)
}
if len(language) != 0 {
_ = writer.WriteField("language", language)
}
_ = writer.Close()
var (
resp pkg.AudioTranslationResponse
)

client, err := pkg.NewClient(pkg.WithMethod(http.MethodPost), pkg.WithContentType(writer.FormDataContentType()), pkg.WithUrl(pkg.AUDIO_TRANSCRIPTION_URL), pkg.WithBody(body))
if err != nil {
color.Red("New Client Err:%s", err.Error())
return
}

err = client.Send(&resp)
if err != nil {
color.Red("Send Err:%s", err.Error())
return
}

color.Cyan("转录结果:%s", resp.Text)
},
}
)
80 changes: 80 additions & 0 deletions cmd/audio/translation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package audio

import (
"bytes"
"io"
"mime/multipart"
"net/http"
"os"

"github.com/fatih/color"
"github.com/spf13/cobra"

"github.com/zjzjzjzj1874/chatgpt/pkg"
)

func init() {
transCmd.Flags().StringVarP(&file, "file", "f", "", "The audio file to translate, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.")
transCmd.Flags().StringVarP(&model, "model", "m", defaultModel, "ID of the model to use. Only whisper-1 is currently available.")
transCmd.Flags().StringVarP(&prompt, "prompt", "p", "", "An optional text to guide the model's style or continue a previous audio segment. The prompt should be in English.")
transCmd.Flags().StringVarP(&language, "language", "l", "", "The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.")
transCmd.MarkFlagsRequiredTogether("file")
}

var (
transCmd = &cobra.Command{
Use: "trans",
Short: "Translates audio into into text.",
Run: func(cmd *cobra.Command, args []string) {
if len(file) == 0 {
color.Red("%s", "Please input your file")
return
}
fi, err := os.Open(file)
if err != nil {
color.Red("Open file(%s) failure:%s", file, err.Error())
return
}

body := new(bytes.Buffer)
writer := multipart.NewWriter(body)
part, err := writer.CreateFormFile("file", file)
if err != nil {
color.Red("CreateFormFile file(%s) failure:%s", file, err.Error())
return
}
_, err = io.Copy(part, fi)
if err != nil {
color.Red("Copy file(%s) failure:%s", file, err.Error())
return
}
if len(model) != 0 {
_ = writer.WriteField("model", model)
}
if len(prompt) != 0 {
_ = writer.WriteField("prompt", prompt)
}
if len(language) != 0 {
_ = writer.WriteField("language", language)
}
_ = writer.Close()
var (
resp pkg.AudioTranslationResponse
)

client, err := pkg.NewClient(pkg.WithMethod(http.MethodPost), pkg.WithContentType(writer.FormDataContentType()), pkg.WithUrl(pkg.AUDIO_TRANSLATION_URL), pkg.WithBody(body))
if err != nil {
color.Red("New Client Err:%s", err.Error())
return
}

err = client.Send(&resp)
if err != nil {
color.Red("Send Chat Err:%s", err.Error())
return
}

color.Cyan("翻译结果:%s", resp.Text)
},
}
)
2 changes: 2 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"context"
"github.com/fatih/color"
"github.com/zjzjzjzj1874/chatgpt/cmd/audio"
"github.com/zjzjzjzj1874/chatgpt/cmd/image"
"os"

Expand All @@ -24,6 +25,7 @@ func init() {
rootCmd.AddCommand(chat.Cmd)
rootCmd.AddCommand(model.Cmd)
rootCmd.AddCommand(image.Cmd)
rootCmd.AddCommand(audio.Cmd)
rootCmd.CompletionOptions.DisableDefaultCmd = true
}

Expand Down
27 changes: 26 additions & 1 deletion pkg/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ type Client struct {
method string // 请求方法
url string // 请求url
body interface{} // 请求body
contentType string // 类型
}

type Option func(client *Client)
Expand All @@ -29,6 +30,12 @@ func WithPrompt(prompt string) Option {
}
}

func WithContentType(contentType string) Option {
return func(c *Client) {
c.contentType = contentType
}
}

func WithMethod(method string) Option {
return func(c *Client) {
c.method = method
Expand Down Expand Up @@ -58,6 +65,15 @@ func (c *Client) PreNewClient() {
if c.clientTimeoutSec <= 0 {
c.clientTimeoutSec = default_timeout
}
if c.contentType == "" {
}
}

// PostClient 后置处理参数
func (c *Client) PostClient() {
if c.contentType != "" {
c.Client = c.Client.SetCommonContentType(c.contentType)
}
}

func NewClient(opts ...Option) (client *Client, err error) {
Expand All @@ -76,6 +92,7 @@ func NewClient(opts ...Option) (client *Client, err error) {
SetTimeout(time.Duration(client.clientTimeoutSec) * time.Second).
SetCommonBearerAuthToken(key).
SetCommonContentType("application/json; charset=utf-8")
client.PostClient()
return
}

Expand All @@ -85,7 +102,15 @@ func (c *Client) Send(src interface{}) (err error) {
if c.body != nil {
request = request.SetBody(c.body)
}
respErr := ResponseErr{}
resp, err := request.SetSuccessResult(src).SetErrorResult(&respErr).Send(c.method, c.url)
if resp.IsErrorState() {
return respErr.Error
}

_, err = request.SetSuccessResult(src).Send(c.method, c.url)
// TODO add a debug var to print blow info
//color.Cyan("Resp:%v", src)
//res, _ := json.Marshal(src)
//color.Cyan("Total Res:%v", string(res))
return
}
33 changes: 33 additions & 0 deletions pkg/gpt.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,28 @@ const (
GPT_URL = "https://api.openai.com/v1/chat/completions" // POST&GET:和gpt进行聊天
MODEL_URL = "https://api.openai.com/v1/models" // GET:请求模型列表
IMG_CREATE_URL = "https://api.openai.com/v1/images/generations" // POST:图片生成

AUDIO_TRANSLATION_URL = "https://api.openai.com/v1/audio/transcriptions" // POST:音频asr
AUDIO_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions" // POST:音频转录
)

// 返回错误信息
type (
ResponseErr struct {
Error RespErr `json:"error"`
}
RespErr struct {
Message string `json:"message"`
Type string `json:"type"`
Param interface{} `json:"param"`
Code interface{} `json:"code"`
}
)

func (r RespErr) Error() string {
return r.Message
}

type Text2Cmd struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
Expand Down Expand Up @@ -101,3 +121,16 @@ type (
URL string `json:"url"`
}
)

type (
AudioTranslationRequest struct {
File string `json:"file"`
Model string `json:"model"`
Prompt string `json:"prompt"`
Language string `json:"language" description:"The language of the input audio"`
}

AudioTranslationResponse struct {
Text string `json:"text"`
}
)

0 comments on commit a8976cc

Please sign in to comment.