Skip to content

Commit

Permalink
Merge pull request #1 from NickHeiner/master
Browse files Browse the repository at this point in the history
merge back the one other change and follow nick with moving to version 1.2 for feature add.
  • Loading branch information
syonfox authored Dec 25, 2022
2 parents 473daf7 + b779e40 commit bfefccc
Show file tree
Hide file tree
Showing 5 changed files with 5,851 additions and 13 deletions.
8 changes: 4 additions & 4 deletions Encoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ const byte_decoder = {}
Object.keys(byte_encoder).map(x => { byte_decoder[byte_encoder[x]] = x })

const bpe_ranks = dictZip(bpe_merges, range(0, bpe_merges.length))
const cache = {}
const cache = new Map;

function bpe(token) {
if (token in cache) {
return cache[token]
if (cache.has(token)) {
return cache.get(token)
}``

let word = token.split('')
Expand Down Expand Up @@ -147,7 +147,7 @@ function bpe(token) {
}

word = word.join(' ')
cache[token] = word
cache.set(token, word)

return word
}
Expand Down
9 changes: 8 additions & 1 deletion Encoder.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,11 @@ test('emojis', () => {
const str = "hello 👋 world 🌍";
expect(encode(str)).toEqual([31373, 50169, 233, 995, 12520, 234, 235])
expect(decode(encode(str))).toEqual(str)
});
});

test('properties of Object',()=>{
const str = "toString constructor hasOwnProperty valueOf";

expect(encode(str)).toEqual([1462, 10100, 23772, 468, 23858, 21746, 1988, 5189]);
expect(decode(encode(str))).toEqual(str);
})
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# This is a fork of https://github.com/latitudegames/GPT-3-Encoder. I made this fork so I could apply some PRs that had been sent to the upstream repo.

~~~
# GPT-3-Encoder
Javascript BPE Encoder Decoder for GPT-2 / GPT-3
Expand Down
Loading

0 comments on commit bfefccc

Please sign in to comment.