Skip to content

Commit

Permalink
Use const char* for JSON key name (#60721)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored Jan 16, 2025
1 parent a15a4b5 commit fb6c4e3
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 82 deletions.
4 changes: 2 additions & 2 deletions pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc);
typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc);
typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc);
typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc);
typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen);
typedef const char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen);
typedef void *(*JSPFN_MALLOC)(size_t size);
typedef void (*JSPFN_FREE)(void *pptr);
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,7 @@ Perhaps implement recursion detection */
void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
size_t cbName) {
const char *value;
char *objName;
const char *objName;
int count;
JSOBJ iterObj;
size_t szlen;
Expand Down
135 changes: 56 additions & 79 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ Numeric decoder derived from TCL library

npy_int64 get_nat(void) { return NPY_MIN_INT64; }

typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
size_t *_outLen);
typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
size_t *_outLen);

int object_is_decimal_type(PyObject *obj);
int object_is_dataframe_type(PyObject *obj);
Expand Down Expand Up @@ -106,7 +106,7 @@ typedef struct __TypeContext {
double doubleValue;
JSINT64 longValue;

char *cStr;
const char *cStr;
NpyArrContext *npyarr;
PdBlockContext *pdblock;
int transpose;
Expand Down Expand Up @@ -301,14 +301,15 @@ static npy_float64 total_seconds(PyObject *td) {
return double_val;
}

static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
size_t *_outLen) {
static const char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
size_t *_outLen) {
PyObject *obj = (PyObject *)_obj;
*_outLen = PyBytes_GET_SIZE(obj);
return PyBytes_AS_STRING(obj);
}

static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
static const char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
size_t *_outLen) {
char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
if (encoded == NULL) {
/* Something went wrong.
Expand All @@ -321,24 +322,24 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
}

/* JSON callback. returns a char* and mutates the pointer to *len */
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
static const char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
return GET_TC(tc)->cStr;
}

/* JSON callback. returns a char* and mutates the pointer to *len */
static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
static const char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
JSONTypeContext *tc, size_t *len) {
GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len);
return GET_TC(tc)->cStr;
}

/* JSON callback */
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
size_t *len) {
static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
size_t *len) {
if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) {
PyErr_SetString(PyExc_TypeError, "Expected date or datetime object");
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
Expand All @@ -349,7 +350,8 @@ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
return PyDateTimeToIso(obj, base, len);
}

static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
size_t *outLen) {
PyObject *obj = (PyObject *)_obj;
PyObject *str = PyObject_CallMethod(obj, "isoformat", NULL);
if (str == NULL) {
Expand All @@ -373,8 +375,8 @@ static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
return outValue;
}

static char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
size_t *len) {
static const char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
size_t *len) {
PyObject *obj = (PyObject *)_obj;
PyObject *format_spec = PyUnicode_FromStringAndSize("f", 1);
PyObject *str = PyObject_Format(obj, format_spec);
Expand Down Expand Up @@ -558,10 +560,10 @@ static JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
NpyArrContext *npyarr = GET_TC(tc)->npyarr;
char *cStr;
const char *cStr;

if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
Expand Down Expand Up @@ -609,11 +611,11 @@ static int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
return NpyArr_iterNextItem(obj, tc);
}

static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc, size_t *outLen) {
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
char *cStr;
const char *cStr;

if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) {
const npy_intp idx = blkCtxt->colIdx - 1;
Expand All @@ -631,12 +633,12 @@ static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
return cStr;
}

static char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc,
size_t *outLen) {
static const char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc,
size_t *outLen) {
PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
char *cStr;
const char *cStr;

if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
Expand Down Expand Up @@ -817,9 +819,9 @@ static JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
static const char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
return NULL;
}

Expand Down Expand Up @@ -864,9 +866,9 @@ static JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
static const char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
return NULL;
}

Expand Down Expand Up @@ -962,8 +964,8 @@ static JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
}
Expand Down Expand Up @@ -994,9 +996,9 @@ static JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *Py_UNUSED(tc),
size_t *Py_UNUSED(outLen)) {
return NULL;
}

Expand All @@ -1005,24 +1007,16 @@ static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
//=============================================================================
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyObject_Malloc(20);
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
if (!GET_TC(tc)->cStr) {
return 0;
}

const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (index == 0) {
memcpy(GET_TC(tc)->cStr, "name", 5);
GET_TC(tc)->cStr = "name";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
memcpy(GET_TC(tc)->cStr, "data", 5);
GET_TC(tc)->cStr = "data";
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand All @@ -1042,8 +1036,8 @@ static JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = strlen(GET_TC(tc)->cStr);
return GET_TC(tc)->cStr;
}
Expand All @@ -1054,28 +1048,20 @@ static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyObject_Malloc(20);
enc->outputFormat = VALUES; // for contained series
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
if (!GET_TC(tc)->cStr) {
return 0;
}

const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (index == 0) {
memcpy(GET_TC(tc)->cStr, "name", 5);
GET_TC(tc)->cStr = "name";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
memcpy(GET_TC(tc)->cStr, "index", 6);
GET_TC(tc)->cStr = "index";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
memcpy(GET_TC(tc)->cStr, "data", 5);
GET_TC(tc)->cStr = "data";
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand All @@ -1097,8 +1083,8 @@ static JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = strlen(GET_TC(tc)->cStr);
return GET_TC(tc)->cStr;
}
Expand All @@ -1109,28 +1095,20 @@ static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyObject_Malloc(20);
enc->outputFormat = VALUES; // for contained series & index
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
if (!GET_TC(tc)->cStr) {
return 0;
}

const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (index == 0) {
memcpy(GET_TC(tc)->cStr, "columns", 8);
GET_TC(tc)->cStr = "columns";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
} else if (index == 1) {
memcpy(GET_TC(tc)->cStr, "index", 6);
GET_TC(tc)->cStr = "index";
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
memcpy(GET_TC(tc)->cStr, "data", 5);
GET_TC(tc)->cStr = "data";
Py_INCREF(obj);
GET_TC(tc)->itemValue = obj;
} else {
Expand All @@ -1150,8 +1128,8 @@ static JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj),
JSONTypeContext *tc, size_t *outLen) {
*outLen = strlen(GET_TC(tc)->cStr);
return GET_TC(tc)->cStr;
}
Expand Down Expand Up @@ -1201,8 +1179,8 @@ static JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
return GET_TC(tc)->itemValue;
}

static char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
static const char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
size_t *outLen) {
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
}
Expand Down Expand Up @@ -1902,7 +1880,6 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->rowLabels = NULL;
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
GET_TC(tc)->columnLabels = NULL;
PyObject_Free(GET_TC(tc)->cStr);
GET_TC(tc)->cStr = NULL;
PyObject_Free(tc->prv);
tc->prv = NULL;
Expand Down Expand Up @@ -1953,8 +1930,8 @@ static JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {
return GET_TC(tc)->iterGetValue(obj, tc);
}

static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen) {
static const char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
size_t *outLen) {
return GET_TC(tc)->iterGetName(obj, tc, outLen);
}

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/json/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_read_zipped_json(datapath):

@td.skip_if_not_us_locale
@pytest.mark.single_cpu
@pytest.mark.network
def test_with_s3_url(compression, s3_public_bucket, s3so):
# Bucket created in tests/io/conftest.py
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,7 @@ def test_read_inline_jsonl(self):
tm.assert_frame_equal(result, expected)

@pytest.mark.single_cpu
@pytest.mark.network
@td.skip_if_not_us_locale
def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
# GH17200
Expand Down Expand Up @@ -2011,6 +2012,7 @@ def test_json_multiindex(self):
assert result == expected

@pytest.mark.single_cpu
@pytest.mark.network
def test_to_s3(self, s3_public_bucket, s3so):
# GH 28375
mock_bucket_name, target_file = s3_public_bucket.name, "test.json"
Expand Down

0 comments on commit fb6c4e3

Please sign in to comment.