Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 2 additions & 123 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ extern "C" {
#include "pycore_uop.h" // _PyUOpInstruction
#include "pycore_uop_ids.h"
#include "pycore_stackref.h" // _PyStackRef
#include "pycore_optimizer_types.h"
#include <stdbool.h>


Expand Down Expand Up @@ -84,7 +85,7 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
#define JIT_CLEANUP_THRESHOLD 1000

int _Py_uop_analyze_and_optimize(
PyFunctionObject *func,
_PyThreadStateImpl *tstate,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);

Expand Down Expand Up @@ -112,86 +113,6 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
return inst->error_target;
}

// Holds locals, stack, locals, stack ... co_consts (in that order)
#define MAX_ABSTRACT_INTERP_SIZE 4096

#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)

// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
#define MAX_ABSTRACT_FRAME_DEPTH (16)

// The maximum number of side exits that we can take before requiring forward
// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
// is the "maximum amount of polymorphism" that an isolated trace tree can
// handle before rejoining the rest of the program.
#define MAX_CHAIN_DEPTH 4

/* Symbols */
/* See explanation in optimizer_symbols.c */


typedef enum _JitSymType {
JIT_SYM_UNKNOWN_TAG = 1,
JIT_SYM_NULL_TAG = 2,
JIT_SYM_NON_NULL_TAG = 3,
JIT_SYM_BOTTOM_TAG = 4,
JIT_SYM_TYPE_VERSION_TAG = 5,
JIT_SYM_KNOWN_CLASS_TAG = 6,
JIT_SYM_KNOWN_VALUE_TAG = 7,
JIT_SYM_TUPLE_TAG = 8,
JIT_SYM_TRUTHINESS_TAG = 9,
JIT_SYM_COMPACT_INT = 10,
} JitSymType;

typedef struct _jit_opt_known_class {
uint8_t tag;
uint32_t version;
PyTypeObject *type;
} JitOptKnownClass;

typedef struct _jit_opt_known_version {
uint8_t tag;
uint32_t version;
} JitOptKnownVersion;

typedef struct _jit_opt_known_value {
uint8_t tag;
PyObject *value;
} JitOptKnownValue;

#define MAX_SYMBOLIC_TUPLE_SIZE 7

typedef struct _jit_opt_tuple {
uint8_t tag;
uint8_t length;
uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
} JitOptTuple;

typedef struct {
uint8_t tag;
bool invert;
uint16_t value;
} JitOptTruthiness;

typedef struct {
uint8_t tag;
} JitOptCompactInt;

typedef union _jit_opt_symbol {
uint8_t tag;
JitOptKnownClass cls;
JitOptKnownValue value;
JitOptKnownVersion version;
JitOptTuple tuple;
JitOptTruthiness truthiness;
JitOptCompactInt compact;
} JitOptSymbol;


// This mimics the _PyStackRef API
typedef union {
uintptr_t bits;
} JitOptRef;

#define REF_IS_BORROWED 1

Expand Down Expand Up @@ -238,48 +159,6 @@ PyJitRef_IsBorrowed(JitOptRef ref)
return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED;
}

struct _Py_UOpsAbstractFrame {
bool globals_watched;
// The version number of the globals dicts, once checked. 0 if unchecked.
uint32_t globals_checked_version;
// Max stacklen
int stack_len;
int locals_len;
PyFunctionObject *func;
PyCodeObject *code;

JitOptRef *stack_pointer;
JitOptRef *stack;
JitOptRef *locals;
};

typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;

typedef struct ty_arena {
int ty_curr_number;
int ty_max_number;
JitOptSymbol arena[TY_ARENA_SIZE];
} ty_arena;

typedef struct _JitOptContext {
char done;
char out_of_space;
bool contradiction;
// Has the builtins dict been watched?
bool builtins_watched;
// The current "executing" frame.
_Py_UOpsAbstractFrame *frame;
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
int curr_frame_depth;

// Arena for the symbolic types.
ty_arena t_arena;

JitOptRef *n_consumed;
JitOptRef *limit;
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
} JitOptContext;

extern bool _Py_uop_sym_is_null(JitOptRef sym);
extern bool _Py_uop_sym_is_not_null(JitOptRef sym);
extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym);
Expand Down
137 changes: 137 additions & 0 deletions Include/internal/pycore_optimizer_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#ifndef Py_INTERNAL_OPTIMIZER_TYPES_H
#define Py_INTERNAL_OPTIMIZER_TYPES_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_uop.h" // UOP_MAX_TRACE_LENGTH

// Holds locals, stack, locals, stack ... co_consts (in that order)
#define MAX_ABSTRACT_INTERP_SIZE 4096

#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)

// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
#define MAX_ABSTRACT_FRAME_DEPTH (16)

// The maximum number of side exits that we can take before requiring forward
// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
// is the "maximum amount of polymorphism" that an isolated trace tree can
// handle before rejoining the rest of the program.
#define MAX_CHAIN_DEPTH 4

/* Symbols */
/* See explanation in optimizer_symbols.c */


typedef enum _JitSymType {
JIT_SYM_UNKNOWN_TAG = 1,
JIT_SYM_NULL_TAG = 2,
JIT_SYM_NON_NULL_TAG = 3,
JIT_SYM_BOTTOM_TAG = 4,
JIT_SYM_TYPE_VERSION_TAG = 5,
JIT_SYM_KNOWN_CLASS_TAG = 6,
JIT_SYM_KNOWN_VALUE_TAG = 7,
JIT_SYM_TUPLE_TAG = 8,
JIT_SYM_TRUTHINESS_TAG = 9,
JIT_SYM_COMPACT_INT = 10,
} JitSymType;

typedef struct _jit_opt_known_class {
uint8_t tag;
uint32_t version;
PyTypeObject *type;
} JitOptKnownClass;

typedef struct _jit_opt_known_version {
uint8_t tag;
uint32_t version;
} JitOptKnownVersion;

typedef struct _jit_opt_known_value {
uint8_t tag;
PyObject *value;
} JitOptKnownValue;

#define MAX_SYMBOLIC_TUPLE_SIZE 7

typedef struct _jit_opt_tuple {
uint8_t tag;
uint8_t length;
uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
} JitOptTuple;

typedef struct {
uint8_t tag;
bool invert;
uint16_t value;
} JitOptTruthiness;

typedef struct {
uint8_t tag;
} JitOptCompactInt;

typedef union _jit_opt_symbol {
uint8_t tag;
JitOptKnownClass cls;
JitOptKnownValue value;
JitOptKnownVersion version;
JitOptTuple tuple;
JitOptTruthiness truthiness;
JitOptCompactInt compact;
} JitOptSymbol;

// This mimics the _PyStackRef API
typedef union {
uintptr_t bits;
} JitOptRef;

typedef struct _Py_UOpsAbstractFrame {
bool globals_watched;
// The version number of the globals dicts, once checked. 0 if unchecked.
uint32_t globals_checked_version;
// Max stacklen
int stack_len;
int locals_len;
PyFunctionObject *func;
PyCodeObject *code;

JitOptRef *stack_pointer;
JitOptRef *stack;
JitOptRef *locals;
} _Py_UOpsAbstractFrame;

typedef struct ty_arena {
int ty_curr_number;
int ty_max_number;
JitOptSymbol arena[TY_ARENA_SIZE];
} ty_arena;

typedef struct _JitOptContext {
char done;
char out_of_space;
bool contradiction;
// Has the builtins dict been watched?
bool builtins_watched;
// The current "executing" frame.
_Py_UOpsAbstractFrame *frame;
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
int curr_frame_depth;

// Arena for the symbolic types.
ty_arena t_arena;

JitOptRef *n_consumed;
JitOptRef *limit;
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
} JitOptContext;


#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_OPTIMIZER_TYPES_H */
4 changes: 3 additions & 1 deletion Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ extern "C" {
#include "pycore_freelist_state.h" // struct _Py_freelists
#include "pycore_interpframe_structs.h" // _PyInterpreterFrame
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
#include "pycore_optimizer_types.h" // JitOptContext
#include "pycore_qsbr.h" // struct qsbr
#include "pycore_uop.h" // struct _PyUOpInstruction
#include "pycore_structs.h"
Expand Down Expand Up @@ -52,10 +53,11 @@ typedef struct _PyJitTracerTranslatorState {
} _PyJitTracerTranslatorState;

typedef struct _PyJitTracerState {
_PyUOpInstruction *code_buffer;
_PyJitTracerInitialState initial_state;
_PyJitTracerPreviousState prev_state;
_PyJitTracerTranslatorState translator_state;
JitOptContext opt_context;
_PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH];
} _PyJitTracerState;

#endif
Expand Down
11 changes: 2 additions & 9 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1025,13 +1025,6 @@ _PyJit_TryInitializeTracing(
if (oparg > 0xFFFF) {
return 0;
}
if (_tstate->jit_tracer_state.code_buffer == NULL) {
_tstate->jit_tracer_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
if (_tstate->jit_tracer_state.code_buffer == NULL) {
// Don't error, just go to next instruction.
return 0;
}
}
PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
if (func == NULL) {
return 0;
Expand Down Expand Up @@ -1484,8 +1477,8 @@ uop_optimize(
OPT_STAT_INC(traces_created);
if (!is_noopt) {
length = _Py_uop_analyze_and_optimize(
_tstate->jit_tracer_state.initial_state.func,
buffer,length,
_tstate,
buffer, length,
curr_stackentries, dependencies);
if (length <= 0) {
return length;
Expand Down
11 changes: 6 additions & 5 deletions Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "pycore_opcode_metadata.h"
#include "pycore_opcode_utils.h"
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_uop_metadata.h"
#include "pycore_long.h"
#include "pycore_interpframe.h" // _PyFrame_GetCode
Expand Down Expand Up @@ -334,17 +335,17 @@ _Py_opt_assert_within_stack_bounds(
/* >0 (length) for success, 0 for not ready, clears all possible errors. */
static int
optimize_uops(
PyFunctionObject *func,
_PyThreadStateImpl *tstate,
_PyUOpInstruction *trace,
int trace_len,
int curr_stacklen,
_PyBloomFilter *dependencies
)
{
assert(!PyErr_Occurred());
PyFunctionObject *func = tstate->jit_tracer_state.initial_state.func;

JitOptContext context;
JitOptContext *ctx = &context;
JitOptContext *ctx = &tstate->jit_tracer_state.opt_context;
uint32_t opcode = UINT16_MAX;

// Make sure that watchers are set up
Expand Down Expand Up @@ -574,7 +575,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
// > 0 - length of optimized trace
int
_Py_uop_analyze_and_optimize(
PyFunctionObject *func,
_PyThreadStateImpl *tstate,
_PyUOpInstruction *buffer,
int length,
int curr_stacklen,
Expand All @@ -584,7 +585,7 @@ _Py_uop_analyze_and_optimize(
OPT_STAT_INC(optimizer_attempts);

length = optimize_uops(
func, buffer,
tstate, buffer,
length, curr_stacklen, dependencies);

if (length == 0) {
Expand Down
Loading
Loading