Skip to content

Commit

Permalink
feat: compile collected basic blocks (python#62)
Browse files Browse the repository at this point in the history
* fix: disable BB_TEST_ITER in stencil compiler

* feat: compile collected basic blocks

* fix: memleak

* feat: compile trace exits too

* nit: logging when JIT_DEBUG isn't set

* nit: fix warning

---------

Co-authored-by: Jules <[email protected]>
  • Loading branch information
Fidget-Spinner and JuliaPoo authored Jul 12, 2023
1 parent 22580dc commit 2d04e2e
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 21 deletions.
2 changes: 2 additions & 0 deletions Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ typedef struct _PyTier2BBMetadata {
_Py_CODEUNIT *tier2_start;
// Note, this is the first tier 1 instruction to execute AFTER the BB ends.
_Py_CODEUNIT *tier1_end;
// Tier 2.5 machine code function trampoline pointer
void *machine_code;
} _PyTier2BBMetadata;

// Bump allocator for basic blocks (overallocated)
Expand Down
4 changes: 2 additions & 2 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ typedef struct {
} _PyBBBranchCache;

#define INLINE_CACHE_ENTRIES_BB_BRANCH CACHE_ENTRIES(_PyBBBranchCache)

#define INLINE_CACHE_ENTRIES_JUMP_BACKWARD CACHE_ENTRIES(_PyBBBranchCache)


/* PEP 659
* Specialization and quickening structs and helper functions
*/
Expand Down Expand Up @@ -279,7 +279,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
#define BB_TEST_IS_SUCCESSOR(frame) ((frame->bb_test) >> 4)
#define BB_TEST_GET_N_REQUIRES_POP(bb_flag) ((bb_flag) & 0b1111)

extern _Py_CODEUNIT *_PyCode_Tier2Warmup(struct _PyInterpreterFrame *,
PyAPI_FUNC(_Py_CODEUNIT *) _PyCode_Tier2Warmup(struct _PyInterpreterFrame *,
_Py_CODEUNIT *);
extern _Py_CODEUNIT *_PyTier2_GenerateNextBB(
struct _PyInterpreterFrame *frame,
Expand Down
4 changes: 2 additions & 2 deletions Include/internal/pycore_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ typedef enum {

typedef _PyJITReturnCode (*_PyJITFunction)(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, _Py_CODEUNIT *next_instr);

PyAPI_FUNC(_PyJITFunction)_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace);
PyAPI_FUNC(void)_PyJIT_Free(_PyJITFunction trace);
PyAPI_FUNC(_PyJITFunction)_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offsets, int n_jump_targets);
PyAPI_FUNC(void)_PyJIT_Free(_PyJITFunction trace);
3 changes: 1 addition & 2 deletions Include/internal/pycore_opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,4 +532,7 @@ def pseudo_op(name, op, real_ops):
# Storing an unboxed value, overwriting an unboxed local.
'STORE_FAST_UNBOXED_UNBOXED',
# The traditional STORE_FAST is storing a boxed value, overwriting a boxed local.

# Tier 2.5
'EXIT_TRACE',
]
5 changes: 5 additions & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3286,6 +3286,11 @@ dummy_func(
Py_UNREACHABLE();
}

// Special sentinel to indicate the end of a machine code trace
inst(EXIT_TRACE, (--)) {
Py_UNREACHABLE();
}

// Tier 2 instructions
// Type propagator assumes this doesn't affect type context
inst(BB_BRANCH, (unused/10 --)) {
Expand Down
4 changes: 4 additions & 0 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 24 additions & 9 deletions Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ copy_and_patch(unsigned char *memory, const Stencil *stencil, uintptr_t patches[
// The world's smallest compiler?
// Make sure to call _PyJIT_Free on the memory when you're done with it!
_PyJITFunction
_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offsets, int n_jump_targets)
{
assert(size > 0);
if (!stencils_loaded) {
stencils_loaded = 1;
for (size_t i = 0; i < Py_ARRAY_LENGTH(stencils); i++) {
Expand All @@ -120,7 +121,8 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
return NULL;
}
// First, loop over everything once to find the total compiled size:
size_t nbytes = trampoline_stencil.nbytes;
// size_t nbytes = trampoline_stencil.nbytes;
size_t nbytes = 0;
for (int i = 0; i < size; i++) {
_Py_CODEUNIT *instruction = trace[i];
const Stencil *stencil = &stencils[instruction->op.code];
Expand All @@ -135,14 +137,26 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
}
unsigned char *head = memory;
uintptr_t patches[] = GET_PATCHES();
// First, the trampoline:
const Stencil *stencil = &trampoline_stencil;
patches[HOLE_base] = (uintptr_t)head;
patches[HOLE_continue] = (uintptr_t)head + stencil->nbytes;
copy_and_patch(head, stencil, patches);
head += stencil->nbytes;
//// First, the trampoline:
//const Stencil *stencil = &trampoline_stencil;
//patches[HOLE_base] = (uintptr_t)head;
//patches[HOLE_continue] = (uintptr_t)head + stencil->nbytes;
//copy_and_patch(head, stencil, patches);
//head += stencil->nbytes;
// Then, all of the stencils:
int seen_jump_targets = 0;
// Allocate all the entry point (trampoline) stencils,
unsigned char *entry_points = alloc(trampoline_stencil.nbytes * n_jump_targets);
for (int i = 0; i < size; i++) {
// For each jump target, create an entry trampoline.
if (i == jump_target_trace_offsets[seen_jump_targets]) {
seen_jump_targets++;
const Stencil *trampoline = &trampoline_stencil;
patches[HOLE_base] = (uintptr_t)entry_points;
patches[HOLE_continue] = (uintptr_t)head;
copy_and_patch(entry_points, trampoline, patches);
entry_points += trampoline->nbytes;
}
_Py_CODEUNIT *instruction = trace[i];
const Stencil *stencil = &stencils[instruction->op.code];
patches[HOLE_base] = (uintptr_t)head;
Expand All @@ -156,5 +170,6 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
};
// Wow, done already?
assert(memory + nbytes == head);
return (_PyJITFunction)memory;
assert(seen_jump_targets == n_jump_targets);
return (_PyJITFunction)entry_points;
}
5 changes: 5 additions & 0 deletions Python/opcode_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 0;
case CACHE:
return 0;
case EXIT_TRACE:
return 0;
case BB_BRANCH:
return 0;
case BB_BRANCH_IF_FLAG_UNSET:
Expand Down Expand Up @@ -817,6 +819,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 0;
case CACHE:
return 0;
case EXIT_TRACE:
return 0;
case BB_BRANCH:
return 0;
case BB_BRANCH_IF_FLAG_UNSET:
Expand Down Expand Up @@ -1041,6 +1045,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[SWAP] = { true, INSTR_FMT_IB },
[EXTENDED_ARG] = { true, INSTR_FMT_IB },
[CACHE] = { true, INSTR_FMT_IX },
[EXIT_TRACE] = { true, INSTR_FMT_IX },
[BB_BRANCH] = { true, INSTR_FMT_IBC000000000 },
[BB_BRANCH_IF_FLAG_UNSET] = { true, INSTR_FMT_IBC000000000 },
[BB_JUMP_IF_FLAG_UNSET] = { true, INSTR_FMT_IBC000000000 },
Expand Down
2 changes: 1 addition & 1 deletion Python/opcode_targets.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 2d04e2e

Please sign in to comment.