fix: predictDraw should give same result as python (#642)

* add tests for predictDraw to mirror python * remove obsolete tests * implementation of predictDraw * add tests for predictDraw to mirror python * merge mistake * enforce always returning a number, even if NaN
philihp · Jul 31, 2024 · da4d3cd · da4d3cd
1 parent 6973ba1
commit da4d3cd
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 47 deletions.
diff --git a/eslint.config.js b/eslint.config.js
@@ -12,7 +12,7 @@ export default tseslint.config(
     extends: [eslint.configs.recommended, eslintPluginPrettierRecommended, ...tseslint.configs.recommended],
     rules: {
       '@typescript-eslint/no-unused-vars': [
-        'error',
+        'warn',
         {
           // allow unused variables if they begin with _
           argsIgnorePattern: '^_',

diff --git a/src/__tests__/predict-draw.test.ts b/src/__tests__/predict-draw.test.ts
@@ -1,39 +1,94 @@
 import { rating, predictDraw } from '..'
 
 describe('predictDraw', () => {
-  const precision = 6
+  it('if a tree falls in the forest', () => {
+    expect(predictDraw([])).toBe(Number.NaN)
+  })
 
-  const a1 = rating()
-  const a2 = rating({ mu: 32.444, sigma: 1.123 })
+  it('mirrors results from python', () => {
+    // from https://github.com/philihp/openskill.js/issues/599
+    const t1 = [rating({ mu: 25, sigma: 1 }), rating({ mu: 25, sigma: 1 })]
+    const t2 = [rating({ mu: 25, sigma: 1 }), rating({ mu: 25, sigma: 1 })]
+    expect(predictDraw([t1, t2])).toBe(0.2433180271619435)
+  })
 
-  const b1 = rating({ mu: 35.881, sigma: 0.0001 })
-  const b2 = rating({ mu: 25.188, sigma: 1.421 })
+  // we use toBeCloseTo because of differences between the gaussian library we use in js and
+  // the statistics.NormalDist impl in py, so the conditioning of the answer is only equivalent
+  // to a certain degree of precision.
+  //
+  // This is known and accepted.
 
-  const team1 = [a1, a2]
-  const team2 = [b1, b2]
+  it('gives a low probability in a 5 team match', () => {
+    // from https://openskill.me/en/stable/manual.html
+    const p1 = rating({ mu: 35, sigma: 1.0 })
+    const p2 = rating({ mu: 35, sigma: 1.0 })
+    const p3 = rating({ mu: 35, sigma: 1.0 })
+    const p4 = rating({ mu: 35, sigma: 1.0 })
+    const p5 = rating({ mu: 35, sigma: 1.0 })
 
-  it('if a tree falls in the forest', () => {
-    expect.assertions(1)
-    expect(predictDraw([])).toBeUndefined()
+    const team1 = [p1, p2]
+    const team2 = [p3, p4, p5]
+    expect(predictDraw([team1, team2])).toBeCloseTo(0.0002807397636509501, 9)
+  })
+
+  it('gives a higher probability with fewer players', () => {
+    // from https://openskill.me/en/stable/manual.html
+    const p1 = rating({ mu: 35, sigma: 1.0 })
+    const p2 = rating({ mu: 35, sigma: 1.1 })
+    const team1 = [p1]
+    const team2 = [p2]
+    expect(predictDraw([team1, team2])).toBeCloseTo(0.4868868769871696, 8)
+  })
+
+  it('returns NaN when one team of nobody', () => {
+    // this could be undefined, but i think that makes more work for people to guard against that response,
+    // while a NaN tends to passed along without halting.
+    expect(predictDraw([[]])).toBe(Number.NaN)
   })
 
-  it('predicts 100% draw for solitaire', () => {
-    expect.assertions(1)
-    expect(predictDraw([team1])).toBeCloseTo(1, precision)
+  it('returns one when two teams of nobody', () => {
+    expect(predictDraw([[], []])).toBe(Number.NaN)
   })
 
-  it('predicts 100% draw for self v self', () => {
-    expect.assertions(1)
-    expect(predictDraw([[b1], [b1]])).toBeCloseTo(1, precision)
+  it('returns NaN when only one team', () => {
+    const p1 = rating({ mu: 23.096623784758727, sigma: 8.138233582011868 })
+    const p2 = rating({ mu: 28.450555874288018, sigma: 8.156810439252277 })
+    expect(predictDraw([[p1, p2]])).toBe(Number.NaN)
   })
 
-  it('predicts draw for two teams', () => {
-    expect.assertions(1)
-    expect(predictDraw([team1, team2])).toBeCloseTo(0.7802613510294426, precision)
+  it('returns 1 when one team verses an empty team', () => {
+    const p2 = rating({ mu: 28.450555874288018, sigma: 8.156810439252277 })
+    expect(predictDraw([[p2], []])).toBe(1)
   })
 
-  it('predicts draw for three asymmetric teams', () => {
-    expect.assertions(1)
-    expect(predictDraw([team1, team2, [a1], [a2], [b1]])).toBeCloseTo(0.07517247728677093, precision)
+  describe('two game, 2v2 scenario with 5th defector', () => {
+    // these ratings come directly from python, where all players start out with baseline mu=25, sigma=25/3, then we do
+    // [[a,b,c], [d,e]] = rate([[a,b,c], [d,e]])
+    // [[a,b], [c,d,e]] = rate([[a,b], [c,d,e]])
+    const [a, b, c, d, _e] = [
+      rating({ mu: 28.450555874288018, sigma: 8.156810439252277 }),
+      rating({ mu: 28.450555874288018, sigma: 8.156810439252277 }),
+      rating({ mu: 23.096623784758727, sigma: 8.138233582011868 }),
+      rating({ mu: 21.537948364040137, sigma: 8.155255551436932 }),
+      rating({ mu: 21.537948364040137, sigma: 8.155255551436932 }),
+    ]
+
+    it('is a likely draw with the 5th sitting out', () => {
+      expect(
+        predictDraw([
+          [a, b],
+          [c, d],
+        ])
+      ).toBeCloseTo(0.09227283302635064, 7)
+    })
+
+    it('has draw probabilities with hypothetical mashups', () => {
+      expect(
+        predictDraw([
+          [a, c],
+          [b, d],
+        ])
+      ).toBeCloseTo(0.11489223845523855, 7)
+    })
   })
 })
diff --git a/src/predict-draw.ts b/src/predict-draw.ts
@@ -1,36 +1,40 @@
-import { flatten } from 'ramda'
+import { flatten, sum, map, addIndex, reduce, head } from 'ramda'
 import constants from './constants'
-import util, { sum } from './util'
+import util, { TeamRating } from './util'
 import { phiMajor, phiMajorInverse } from './statistics'
 import { Options, Team } from './types'
 
-const predictWin = (teams: Team[], options: Options = {}) => {
+const predictDraw = (teams: Team[], options: Options = {}): number => {
   const { teamRating } = util(options)
   const { BETASQ, BETA } = constants(options)
 
-  const n = teams.length
-  if (n === 0) return undefined
-  if (n === 1) return 1
+  const totalPlayerCount = flatten(teams).length
+  const drawProbability = 1 / totalPlayerCount
+  const drawMargin = Math.sqrt(totalPlayerCount) * BETA * phiMajorInverse((1 + drawProbability) / 2)
 
-  const denom = (n * (n - 1)) / (n > 2 ? 1 : 2)
-  const teamRatings = teamRating(teams)
-  const drawMargin = Math.sqrt(flatten(teams).length) * BETA * phiMajorInverse((1 + 1 / n) / 2)
+  const teamRatings = map<Team, TeamRating>((team) => head<TeamRating>(teamRating([team]))!, teams)
 
-  return (
-    Math.abs(
-      teamRatings
-        .map(([muA, sigmaSqA], i) =>
-          teamRatings
-            .filter((_, q) => i !== q)
-            .map(([muB, sigmaSqB]) => {
-              const sigmaBar = Math.sqrt(n * BETASQ + sigmaSqA + sigmaSqB)
-              return phiMajor((drawMargin - muA + muB) / sigmaBar) - phiMajor((muA - muB - drawMargin) / sigmaBar)
-            })
-        )
-        .flat()
-        .reduce(sum, 0)
-    ) / denom
+  const pairwiseProbs: number[] = addIndex<TeamRating, number[]>(reduce<TeamRating, number[]>)(
+    (outerAccum: number[], pairA: TeamRating, i: number): number[] => {
+      const [muA, sigmaSqA] = pairA
+      return reduce<TeamRating, number[]>(
+        (innerAccum: number[], pairB: TeamRating): number[] => {
+          const [muB, sigmaSqB] = pairB
+          const sharedDenom = Math.sqrt(totalPlayerCount * BETASQ + sigmaSqA + sigmaSqB)
+          innerAccum.push(
+            phiMajor((drawMargin - muA + muB) / sharedDenom) - phiMajor((muB - muA - drawMargin) / sharedDenom)
+          )
+          return innerAccum
+        },
+        outerAccum,
+        teamRatings.slice(i + 1)
+      )
+    },
+    [],
+    teamRatings
   )
+
+  return sum(pairwiseProbs) / pairwiseProbs.length
 }
 
-export default predictWin
+export default predictDraw