Skip to content

Commit

Permalink
feat: ensure that the generated short link is unique when the origina…
Browse files Browse the repository at this point in the history
…l url is same (#25)
  • Loading branch information
beihai0xff authored Jul 6, 2024
1 parent 6e74711 commit 3db9ca1
Show file tree
Hide file tree
Showing 16 changed files with 10,312 additions and 43 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Tiny-URL 短链接服务
- [x] URL 编码:支持 Base58 编码;
- [x] 限流器:支持 Redis 与单机令牌桶限流器;
- [x] 读写分离:只读/只写/读写模式运行;
- [ ] 幂等:同一 URL 多次生成,需要保证生成的短链接是唯一的
- [x] 幂等:同一 URL 多次生成,需要保证生成的短链接是唯一的
- [ ] 过期时间:支持短链接过期时间;
- [ ] 可观测:API 访问数据数据、服务监控;

Expand Down Expand Up @@ -97,3 +97,12 @@ curl -L http://localhost/24rgcX
* 数据库读写请求数:平均每秒 116qps 的写入操作与读取操作,峰值 1k/qps 的写入操作与读取操作;
* 缓存服务器内存空间:总共需要 50GB 内存空间,缓存约 1亿条数据;
* 本地缓存存储空间:每台 Server 节点需要 500MB 内存空间用于本地缓存,缓存约 1M 条数据;

## 更多设计细节

* [短链接服务系统设计](docs/system-design.md)
* [Base58 编码算法](docs/base58-design.md)
* [分布式 ID 生成器](docs/tddl-design.md)
* [限流器设计](docs/rate-limiter-design.md)
* [API 性能测试](docs/api-benchmark.md)
* [数据库表结构](docs/ddl)
17 changes: 14 additions & 3 deletions app/turl/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,22 @@ func (c *commandService) Create(ctx context.Context, long []byte) ([]byte, error
}

if err = c.db.Insert(ctx, seq, long); err != nil {
return nil, fmt.Errorf("failed to insert into db: %w", err)
if errors.Is(err, gorm.ErrDuplicatedKey) {
slog.Error(fmt.Sprintf("failed to insert into db: %v, try to get from db", err),
slog.Any("long url", long), slog.Int64("seq", int64(seq)))

record, err := c.db.GetByLongURL(ctx, long)
if err != nil {
return nil, fmt.Errorf("failed to get from db: %w", err)
}

seq = record.Short
} else {
return nil, fmt.Errorf("failed to insert into db: %w", err)
}
}

short := mapping.Base58Encode(seq)

// set local cache and distributed cache, if failed, just log the error, not return err
if err = c.cache.Set(ctx, string(short), long, c.ttl); err != nil {
slog.ErrorContext(ctx, "failed to set cache", slog.Any("error", err))
Expand Down Expand Up @@ -200,7 +211,7 @@ func (q *queryService) Retrieve(ctx context.Context, short []byte) ([]byte, erro
}()

// try to get from db
res, err := q.db.GetTinyURLByID(ctx, seq)
res, err := q.db.GetByShortID(ctx, seq)
if err != nil {
return nil, err
}
Expand Down
15 changes: 13 additions & 2 deletions app/turl/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ func TestService_Create(t *testing.T) {
require.Error(t, err)
require.Nil(t, short)
})

t.Run("CreateExistingURL", func(t *testing.T) {
short, err := turl.Create(context.Background(), []byte("https://www.CreateExistingURL.com"))
require.NoError(t, err)
require.NotNil(t, short)

short2, err := turl.Create(context.Background(), []byte("https://www.CreateExistingURL.com"))
require.NoError(t, err)
require.NotNil(t, short2)
require.Equal(t, short, short2)
})
}

func TestService_Retrieve(t *testing.T) {
Expand Down Expand Up @@ -128,7 +139,7 @@ func TestService_Retrieve_failed(t *testing.T) {

t.Run("GetFailedToGetFromStorage", func(t *testing.T) {
mockCache.EXPECT().Get(mock.Anything, mock.Anything).Return(nil, cache.ErrCacheMiss).Times(1)
mockStorage.EXPECT().GetTinyURLByID(mock.Anything, uint64(38068692543)).Return(nil, testErr).Times(1)
mockStorage.EXPECT().GetByShortID(mock.Anything, uint64(38068692543)).Return(nil, testErr).Times(1)

got, err := turl.Retrieve(context.Background(), []byte("zzzzzz"))
require.ErrorIs(t, err, testErr)
Expand All @@ -137,7 +148,7 @@ func TestService_Retrieve_failed(t *testing.T) {

t.Run("RetrieveFailedToSetCache", func(t *testing.T) {
mockCache.EXPECT().Get(mock.Anything, "zzzzzz").Return(nil, cache.ErrCacheMiss).Times(1)
mockStorage.EXPECT().GetTinyURLByID(mock.Anything, uint64(38068692543)).Return(&storage.TinyURL{LongURL: []byte("https://www.example.com")}, nil).Times(1)
mockStorage.EXPECT().GetByShortID(mock.Anything, uint64(38068692543)).Return(&storage.TinyURL{LongURL: []byte("https://www.example.com")}, nil).Times(1)
mockCache.EXPECT().Set(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(testErr).Times(1)

got, err := turl.Retrieve(context.Background(), []byte("zzzzzz"))
Expand Down
36 changes: 36 additions & 0 deletions docs/api-benchmark.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

# 性能测试

对 API 的性能测试,主要是为了验证 API 的性能是否满足需求,以及在不同的并发情况下,API 的性能表现。

## 测试资源

服务器:Apple MacBook Pro14 M1 Pro 2021 16G 512G SSD

## 测试方式

获取全球访问量前 10k 的域名,每个域名添加 10 个 API 后缀,共计 100k 条无重复数据。使用 100 个协程并发请求,统计写入耗时。

```shell
Benchmark_Create
api_benchmark_test.go:93: send requests: 4550
api_benchmark_test.go:93: send requests: 10040
api_benchmark_test.go:93: send requests: 15422
api_benchmark_test.go:93: send requests: 20977
api_benchmark_test.go:93: send requests: 26532
api_benchmark_test.go:93: send requests: 32452
api_benchmark_test.go:93: send requests: 38095
api_benchmark_test.go:93: send requests: 42921
api_benchmark_test.go:93: send requests: 47579
api_benchmark_test.go:93: send requests: 52629
api_benchmark_test.go:93: send requests: 58464
api_benchmark_test.go:93: send requests: 63961
api_benchmark_test.go:93: send requests: 69651
api_benchmark_test.go:93: send requests: 75325
api_benchmark_test.go:93: send requests: 80952
api_benchmark_test.go:93: send requests: 87016
api_benchmark_test.go:93: send requests: 92535
api_benchmark_test.go:93: send requests: 98344
api_benchmark_test.go:111: success requests: 100000 costs 18.365219458s
```
综合来看,API 的性能表现良好,能够达到 5000+ QPS 的写入速度。
14 changes: 14 additions & 0 deletions docs/base58-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@

Base58 是一种用于表示大整数的编码方式,常用于各种加密货币(如比特币)的地址和其他需要短且易于阅读的标识符。它使用58个字符(0-9,A-Z,a-z,去掉了0,O,I和l)来表示整数。

在短链接应用场景下,我们可以使用 Base58 编码来生成短链接。我们的实现中,我们将一个 `uint64` 类型的数字编码为 base58 字符串,其中 1~8位 58 进制可以表示的数字范围为:
```
58^1 = 58
58^2 = 3364
58^3 = 195112
58^4 = 11316496
58^5 = 656356768
58^6 = 38068692544
58^7 = 2216436115296
58^8 = 128063081718016
```

即 6位 base58 编码可以表示的数字范围为 656356769~38068692543,约为 364 亿,足够我们的短链接服务使用。

## 设计

在我们的实现中,我们提供了两个主要的函数:`Base58Encode``Base58Decode`
Expand Down
18 changes: 18 additions & 0 deletions docs/ddl/sequences.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- auto-generated definition
create table sequences
(
id bigint unsigned auto_increment
primary key,
created_at datetime(3) null,
updated_at datetime(3) null,
deleted_at datetime(3) null,
name varchar(500) not null,
sequence bigint not null,
version bigint null,
constraint idx_sequences_name
unique (name)
);

create index idx_sequences_deleted_at
on sequences (deleted_at);

19 changes: 19 additions & 0 deletions docs/ddl/tiny_urls.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- auto-generated definition
create table tiny_urls
(
id bigint unsigned auto_increment
primary key,
created_at datetime(3) null,
updated_at datetime(3) null,
deleted_at datetime(3) null,
long_url varchar(500) not null,
short bigint not null,
constraint idx_tiny_urls_long_url
unique (long_url),
constraint idx_tiny_urls_short
unique (short)
);

create index idx_tiny_urls_deleted_at
on tiny_urls (deleted_at);

21 changes: 1 addition & 20 deletions docs/system-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,28 +56,9 @@

短链接服务需要存储短链接与长链接的映射关系,可以选择关系型数据库、NoSQL 数据库等,turl 首要支持 MySQL 等关系型数据库,未来考虑支持 MongoDB 等 NoSQL 数据库。

MySQL 数据库表设计如下:
MySQL 数据库表结构可参考 [docs/ddl/tiny_urls.sql](../ddl/tiny_urls.sql)

```sql
create table turl.tiny_urls
(
id bigint unsigned auto_increment
primary key,
created_at datetime(3) null,
updated_at datetime(3) null,
deleted_at datetime(3) null,
long_url varchar(500) not null,
short bigint not null
);

create index idx_tiny_urls_deleted_at
on turl.tiny_urls (deleted_at);

create index idx_tiny_urls_short
on turl.tiny_urls (short);


```



112 changes: 112 additions & 0 deletions internal/tests/benchmark/api_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package benchmark

import (
"bufio"
"crypto/sha256"
"fmt"
"io"
"log"
"net/http"
"os"
"strconv"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)

const addr = "http://localhost:8080/api/shorten"

func readDomain(b *testing.B) []string {
file, err := os.Open("domain.txt")
if err != nil {
b.Error(err)
}

defer file.Close()

var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}

return lines
}

func Benchmark_Create(b *testing.B) {
domains := readDomain(b)

wg, maxRequest := sync.WaitGroup{}, 100
count := atomic.Int64{}
ch, stop := make(chan string, maxRequest), make(chan int)

tr := &http.Transport{
MaxIdleConns: 200,
MaxConnsPerHost: 200,
IdleConnTimeout: 10 * time.Second,
}

client := &http.Client{
Transport: tr,
}

wg.Add(maxRequest)
for range maxRequest {
go func() {
defer wg.Done()

for {
select {
case <-stop:
return
case url := <-ch:
func() {
resp, err := client.Post(addr, "application/json", strings.NewReader(fmt.Sprintf(`{"long_url": "%s"}`, url)))
if err != nil {
b.Error(err)
return
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
log.Fatalln(err)
}
if resp.StatusCode != http.StatusOK {
b.Errorf("Error: %d, resp body: %s", resp.StatusCode, body)
return
}

count.Add(1)
}()
}
}
}()
}

b.ReportAllocs()
b.ResetTimer()
go func() {
for range time.NewTicker(time.Second).C {
b.Log("send requests:", count.Load())
}
}()
start := time.Now()
for i := range 10 {
s := sha256.Sum256([]byte(strconv.Itoa(i)))
for j := range len(domains) {
ch <- fmt.Sprintf("https://%s/test/api/image/1234567890/%x", domains[j], s)
}
}

for len(ch) > 0 {
time.Sleep(100 * time.Millisecond)
}

close(stop)
wg.Wait()

b.Log("success requests: ", count.Load(), "costs", time.Since(start).String())
}
Loading

0 comments on commit 3db9ca1

Please sign in to comment.