Adam Cozzette | 501ecec | 2023-09-26 14:36:20 -0700 | [diff] [blame] | 1 | // Protocol Buffers - Google's data interchange format |
| 2 | // Copyright 2023 Google LLC. All rights reserved. |
Adam Cozzette | 501ecec | 2023-09-26 14:36:20 -0700 | [diff] [blame] | 3 | // |
Protobuf Team Bot | 0fab773 | 2023-11-20 13:38:15 -0800 | [diff] [blame] | 4 | // Use of this source code is governed by a BSD-style |
| 5 | // license that can be found in the LICENSE file or at |
| 6 | // https://developers.google.com/open-source/licenses/bsd |
Adam Cozzette | 501ecec | 2023-09-26 14:36:20 -0700 | [diff] [blame] | 7 | |
| 8 | #include "python/protobuf.h" |
| 9 | |
| 10 | #include "python/descriptor.h" |
| 11 | #include "python/descriptor_containers.h" |
| 12 | #include "python/descriptor_pool.h" |
| 13 | #include "python/extension_dict.h" |
| 14 | #include "python/map.h" |
| 15 | #include "python/message.h" |
| 16 | #include "python/repeated.h" |
| 17 | #include "python/unknown_fields.h" |
| 18 | |
| 19 | static upb_Arena* PyUpb_NewArena(void); |
| 20 | |
| 21 | static void PyUpb_ModuleDealloc(void* module) { |
| 22 | PyUpb_ModuleState* s = PyModule_GetState(module); |
| 23 | PyUpb_WeakMap_Free(s->obj_cache); |
| 24 | if (s->c_descriptor_symtab) { |
| 25 | upb_DefPool_Free(s->c_descriptor_symtab); |
| 26 | } |
| 27 | } |
| 28 | |
| 29 | PyObject* PyUpb_SetAllowOversizeProtos(PyObject* m, PyObject* arg) { |
| 30 | if (!arg || !PyBool_Check(arg)) { |
| 31 | PyErr_SetString(PyExc_TypeError, |
| 32 | "Argument to SetAllowOversizeProtos must be boolean"); |
| 33 | return NULL; |
| 34 | } |
| 35 | PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); |
| 36 | state->allow_oversize_protos = PyObject_IsTrue(arg); |
| 37 | Py_INCREF(arg); |
| 38 | return arg; |
| 39 | } |
| 40 | |
| 41 | static PyMethodDef PyUpb_ModuleMethods[] = { |
| 42 | {"SetAllowOversizeProtos", PyUpb_SetAllowOversizeProtos, METH_O, |
| 43 | "Enable/disable oversize proto parsing."}, |
| 44 | {NULL, NULL}}; |
| 45 | |
| 46 | static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT, |
| 47 | PYUPB_MODULE_NAME, |
| 48 | "Protobuf Module", |
| 49 | sizeof(PyUpb_ModuleState), |
| 50 | PyUpb_ModuleMethods, // m_methods |
| 51 | NULL, // m_slots |
| 52 | NULL, // m_traverse |
| 53 | NULL, // m_clear |
| 54 | PyUpb_ModuleDealloc}; |
| 55 | |
| 56 | // ----------------------------------------------------------------------------- |
| 57 | // ModuleState |
| 58 | // ----------------------------------------------------------------------------- |
| 59 | |
| 60 | PyUpb_ModuleState* PyUpb_ModuleState_MaybeGet(void) { |
| 61 | PyObject* module = PyState_FindModule(&module_def); |
| 62 | return module ? PyModule_GetState(module) : NULL; |
| 63 | } |
| 64 | |
| 65 | PyUpb_ModuleState* PyUpb_ModuleState_GetFromModule(PyObject* module) { |
| 66 | PyUpb_ModuleState* state = PyModule_GetState(module); |
| 67 | assert(state); |
| 68 | assert(PyModule_GetDef(module) == &module_def); |
| 69 | return state; |
| 70 | } |
| 71 | |
| 72 | PyUpb_ModuleState* PyUpb_ModuleState_Get(void) { |
| 73 | PyObject* module = PyState_FindModule(&module_def); |
| 74 | assert(module); |
| 75 | return PyUpb_ModuleState_GetFromModule(module); |
| 76 | } |
| 77 | |
| 78 | PyObject* PyUpb_GetWktBases(PyUpb_ModuleState* state) { |
| 79 | if (!state->wkt_bases) { |
| 80 | PyObject* wkt_module = PyImport_ImportModule(PYUPB_PROTOBUF_INTERNAL_PACKAGE |
| 81 | ".well_known_types"); |
| 82 | |
| 83 | if (wkt_module == NULL) { |
| 84 | return false; |
| 85 | } |
| 86 | |
| 87 | state->wkt_bases = PyObject_GetAttrString(wkt_module, "WKTBASES"); |
| 88 | PyObject* m = PyState_FindModule(&module_def); |
| 89 | // Reparent ownership to m. |
| 90 | PyModule_AddObject(m, "__internal_wktbases", state->wkt_bases); |
| 91 | Py_DECREF(wkt_module); |
| 92 | } |
| 93 | |
| 94 | return state->wkt_bases; |
| 95 | } |
| 96 | |
| 97 | // ----------------------------------------------------------------------------- |
| 98 | // WeakMap |
| 99 | // ----------------------------------------------------------------------------- |
| 100 | |
| 101 | struct PyUpb_WeakMap { |
| 102 | upb_inttable table; |
| 103 | upb_Arena* arena; |
| 104 | }; |
| 105 | |
| 106 | PyUpb_WeakMap* PyUpb_WeakMap_New(void) { |
| 107 | upb_Arena* arena = PyUpb_NewArena(); |
| 108 | PyUpb_WeakMap* map = upb_Arena_Malloc(arena, sizeof(*map)); |
| 109 | map->arena = arena; |
| 110 | upb_inttable_init(&map->table, map->arena); |
| 111 | return map; |
| 112 | } |
| 113 | |
| 114 | void PyUpb_WeakMap_Free(PyUpb_WeakMap* map) { upb_Arena_Free(map->arena); } |
| 115 | |
| 116 | // To give better entropy in the table key, we shift away low bits that are |
| 117 | // always zero. |
| 118 | static const int PyUpb_PtrShift = (sizeof(void*) == 4) ? 2 : 3; |
| 119 | |
| 120 | uintptr_t PyUpb_WeakMap_GetKey(const void* key) { |
| 121 | uintptr_t n = (uintptr_t)key; |
| 122 | assert((n & ((1 << PyUpb_PtrShift) - 1)) == 0); |
| 123 | return n >> PyUpb_PtrShift; |
| 124 | } |
| 125 | |
| 126 | void PyUpb_WeakMap_Add(PyUpb_WeakMap* map, const void* key, PyObject* py_obj) { |
| 127 | upb_inttable_insert(&map->table, PyUpb_WeakMap_GetKey(key), |
| 128 | upb_value_ptr(py_obj), map->arena); |
| 129 | } |
| 130 | |
| 131 | void PyUpb_WeakMap_Delete(PyUpb_WeakMap* map, const void* key) { |
| 132 | upb_value val; |
| 133 | bool removed = |
| 134 | upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), &val); |
| 135 | (void)removed; |
| 136 | assert(removed); |
| 137 | } |
| 138 | |
| 139 | void PyUpb_WeakMap_TryDelete(PyUpb_WeakMap* map, const void* key) { |
| 140 | upb_inttable_remove(&map->table, PyUpb_WeakMap_GetKey(key), NULL); |
| 141 | } |
| 142 | |
| 143 | PyObject* PyUpb_WeakMap_Get(PyUpb_WeakMap* map, const void* key) { |
| 144 | upb_value val; |
| 145 | if (upb_inttable_lookup(&map->table, PyUpb_WeakMap_GetKey(key), &val)) { |
| 146 | PyObject* ret = upb_value_getptr(val); |
| 147 | Py_INCREF(ret); |
| 148 | return ret; |
| 149 | } else { |
| 150 | return NULL; |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | bool PyUpb_WeakMap_Next(PyUpb_WeakMap* map, const void** key, PyObject** obj, |
| 155 | intptr_t* iter) { |
| 156 | uintptr_t u_key; |
| 157 | upb_value val; |
| 158 | if (!upb_inttable_next(&map->table, &u_key, &val, iter)) return false; |
| 159 | *key = (void*)(u_key << PyUpb_PtrShift); |
| 160 | *obj = upb_value_getptr(val); |
| 161 | return true; |
| 162 | } |
| 163 | |
| 164 | void PyUpb_WeakMap_DeleteIter(PyUpb_WeakMap* map, intptr_t* iter) { |
| 165 | upb_inttable_removeiter(&map->table, iter); |
| 166 | } |
| 167 | |
| 168 | // ----------------------------------------------------------------------------- |
| 169 | // ObjCache |
| 170 | // ----------------------------------------------------------------------------- |
| 171 | |
| 172 | PyUpb_WeakMap* PyUpb_ObjCache_Instance(void) { |
| 173 | PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); |
| 174 | return state->obj_cache; |
| 175 | } |
| 176 | |
| 177 | void PyUpb_ObjCache_Add(const void* key, PyObject* py_obj) { |
| 178 | PyUpb_WeakMap_Add(PyUpb_ObjCache_Instance(), key, py_obj); |
| 179 | } |
| 180 | |
| 181 | void PyUpb_ObjCache_Delete(const void* key) { |
| 182 | PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet(); |
| 183 | if (!state) { |
| 184 | // During the shutdown sequence, our object's Dealloc() methods can be |
| 185 | // called *after* our module Dealloc() method has been called. At that |
| 186 | // point our state will be NULL and there is nothing to delete out of the |
| 187 | // map. |
| 188 | return; |
| 189 | } |
| 190 | PyUpb_WeakMap_Delete(state->obj_cache, key); |
| 191 | } |
| 192 | |
| 193 | PyObject* PyUpb_ObjCache_Get(const void* key) { |
| 194 | return PyUpb_WeakMap_Get(PyUpb_ObjCache_Instance(), key); |
| 195 | } |
| 196 | |
| 197 | // ----------------------------------------------------------------------------- |
| 198 | // Arena |
| 199 | // ----------------------------------------------------------------------------- |
| 200 | |
| 201 | typedef struct { |
| 202 | PyObject_HEAD; |
| 203 | upb_Arena* arena; |
| 204 | } PyUpb_Arena; |
| 205 | |
| 206 | // begin:google_only |
| 207 | // static upb_alloc* global_alloc = &upb_alloc_global; |
| 208 | // end:google_only |
| 209 | |
| 210 | // begin:github_only |
| 211 | #ifdef __GLIBC__ |
| 212 | #include <malloc.h> // malloc_trim() |
| 213 | #endif |
| 214 | |
| 215 | // A special allocator that calls malloc_trim() periodically to release |
| 216 | // memory to the OS. Without this call, we appear to leak memory, at least |
| 217 | // as measured in RSS. |
| 218 | // |
| 219 | // We opt not to use this instead of PyMalloc (which would also solve the |
| 220 | // problem) because the latter requires the GIL to be held. This would make |
| 221 | // our messages unsafe to share with other languages that could free at |
| 222 | // unpredictable |
| 223 | // times. |
| 224 | static void* upb_trim_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize, |
| 225 | size_t size) { |
| 226 | (void)alloc; |
| 227 | (void)oldsize; |
| 228 | if (size == 0) { |
| 229 | free(ptr); |
| 230 | #ifdef __GLIBC__ |
| 231 | static int count = 0; |
| 232 | if (++count == 10000) { |
| 233 | malloc_trim(0); |
| 234 | count = 0; |
| 235 | } |
| 236 | #endif |
| 237 | return NULL; |
| 238 | } else { |
| 239 | return realloc(ptr, size); |
| 240 | } |
| 241 | } |
| 242 | static upb_alloc trim_alloc = {&upb_trim_allocfunc}; |
| 243 | static const upb_alloc* global_alloc = &trim_alloc; |
| 244 | // end:github_only |
| 245 | |
| 246 | static upb_Arena* PyUpb_NewArena(void) { |
| 247 | return upb_Arena_Init(NULL, 0, global_alloc); |
| 248 | } |
| 249 | |
| 250 | PyObject* PyUpb_Arena_New(void) { |
| 251 | PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); |
| 252 | PyUpb_Arena* arena = (void*)PyType_GenericAlloc(state->arena_type, 0); |
| 253 | arena->arena = PyUpb_NewArena(); |
| 254 | return &arena->ob_base; |
| 255 | } |
| 256 | |
| 257 | static void PyUpb_Arena_Dealloc(PyObject* self) { |
| 258 | upb_Arena_Free(PyUpb_Arena_Get(self)); |
| 259 | PyUpb_Dealloc(self); |
| 260 | } |
| 261 | |
| 262 | upb_Arena* PyUpb_Arena_Get(PyObject* arena) { |
| 263 | return ((PyUpb_Arena*)arena)->arena; |
| 264 | } |
| 265 | |
| 266 | static PyType_Slot PyUpb_Arena_Slots[] = { |
| 267 | {Py_tp_dealloc, PyUpb_Arena_Dealloc}, |
| 268 | {0, NULL}, |
| 269 | }; |
| 270 | |
| 271 | static PyType_Spec PyUpb_Arena_Spec = { |
| 272 | PYUPB_MODULE_NAME ".Arena", |
| 273 | sizeof(PyUpb_Arena), |
| 274 | 0, // itemsize |
| 275 | Py_TPFLAGS_DEFAULT, |
| 276 | PyUpb_Arena_Slots, |
| 277 | }; |
| 278 | |
| 279 | static bool PyUpb_InitArena(PyObject* m) { |
| 280 | PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m); |
| 281 | state->arena_type = PyUpb_AddClass(m, &PyUpb_Arena_Spec); |
| 282 | return state->arena_type; |
| 283 | } |
| 284 | |
| 285 | // ----------------------------------------------------------------------------- |
| 286 | // Utilities |
| 287 | // ----------------------------------------------------------------------------- |
| 288 | |
| 289 | PyTypeObject* AddObject(PyObject* m, const char* name, PyType_Spec* spec) { |
| 290 | PyObject* type = PyType_FromSpec(spec); |
| 291 | return type && PyModule_AddObject(m, name, type) == 0 ? (PyTypeObject*)type |
| 292 | : NULL; |
| 293 | } |
| 294 | |
| 295 | static const char* PyUpb_GetClassName(PyType_Spec* spec) { |
| 296 | // spec->name contains a fully-qualified name, like: |
| 297 | // google.protobuf.pyext._message.FooBar |
| 298 | // |
| 299 | // Find the rightmost '.' to get "FooBar". |
| 300 | const char* name = strrchr(spec->name, '.'); |
| 301 | assert(name); |
| 302 | return name + 1; |
| 303 | } |
| 304 | |
| 305 | PyTypeObject* PyUpb_AddClass(PyObject* m, PyType_Spec* spec) { |
| 306 | PyObject* type = PyType_FromSpec(spec); |
| 307 | const char* name = PyUpb_GetClassName(spec); |
| 308 | if (PyModule_AddObject(m, name, type) < 0) { |
| 309 | Py_XDECREF(type); |
| 310 | return NULL; |
| 311 | } |
| 312 | return (PyTypeObject*)type; |
| 313 | } |
| 314 | |
| 315 | PyTypeObject* PyUpb_AddClassWithBases(PyObject* m, PyType_Spec* spec, |
| 316 | PyObject* bases) { |
| 317 | PyObject* type = PyType_FromSpecWithBases(spec, bases); |
| 318 | const char* name = PyUpb_GetClassName(spec); |
| 319 | if (PyModule_AddObject(m, name, type) < 0) { |
| 320 | Py_XDECREF(type); |
| 321 | return NULL; |
| 322 | } |
| 323 | return (PyTypeObject*)type; |
| 324 | } |
| 325 | |
| 326 | const char* PyUpb_GetStrData(PyObject* obj) { |
| 327 | if (PyUnicode_Check(obj)) { |
| 328 | return PyUnicode_AsUTF8AndSize(obj, NULL); |
| 329 | } else if (PyBytes_Check(obj)) { |
| 330 | return PyBytes_AsString(obj); |
| 331 | } else { |
| 332 | return NULL; |
| 333 | } |
| 334 | } |
| 335 | |
| 336 | const char* PyUpb_VerifyStrData(PyObject* obj) { |
| 337 | const char* ret = PyUpb_GetStrData(obj); |
| 338 | if (ret) return ret; |
| 339 | PyErr_Format(PyExc_TypeError, "Expected string: %S", obj); |
| 340 | return NULL; |
| 341 | } |
| 342 | |
| 343 | PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds) { |
| 344 | PyObject* name = PyObject_GetAttrString(cls, "__name__"); |
| 345 | PyErr_Format(PyExc_RuntimeError, |
| 346 | "Objects of type %U may not be created directly.", name); |
| 347 | Py_XDECREF(name); |
| 348 | return NULL; |
| 349 | } |
| 350 | |
| 351 | bool PyUpb_IndexToRange(PyObject* index, Py_ssize_t size, Py_ssize_t* i, |
| 352 | Py_ssize_t* count, Py_ssize_t* step) { |
| 353 | assert(i && count && step); |
| 354 | if (PySlice_Check(index)) { |
| 355 | Py_ssize_t start, stop; |
| 356 | if (PySlice_Unpack(index, &start, &stop, step) < 0) return false; |
| 357 | *count = PySlice_AdjustIndices(size, &start, &stop, *step); |
| 358 | *i = start; |
| 359 | } else { |
| 360 | *i = PyNumber_AsSsize_t(index, PyExc_IndexError); |
| 361 | |
| 362 | if (*i == -1 && PyErr_Occurred()) { |
| 363 | PyErr_SetString(PyExc_TypeError, "list indices must be integers"); |
| 364 | return false; |
| 365 | } |
| 366 | |
| 367 | if (*i < 0) *i += size; |
| 368 | *step = 0; |
| 369 | *count = 1; |
| 370 | |
| 371 | if (*i < 0 || size <= *i) { |
| 372 | PyErr_Format(PyExc_IndexError, "list index out of range"); |
| 373 | return false; |
| 374 | } |
| 375 | } |
| 376 | return true; |
| 377 | } |
| 378 | |
| 379 | // ----------------------------------------------------------------------------- |
| 380 | // Module Entry Point |
| 381 | // ----------------------------------------------------------------------------- |
| 382 | |
| 383 | __attribute__((visibility("default"))) PyMODINIT_FUNC PyInit__message(void) { |
| 384 | PyObject* m = PyModule_Create(&module_def); |
| 385 | if (!m) return NULL; |
| 386 | |
| 387 | PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m); |
| 388 | |
| 389 | state->allow_oversize_protos = false; |
| 390 | state->wkt_bases = NULL; |
| 391 | state->obj_cache = PyUpb_WeakMap_New(); |
| 392 | state->c_descriptor_symtab = NULL; |
| 393 | |
| 394 | if (!PyUpb_InitDescriptorContainers(m) || !PyUpb_InitDescriptorPool(m) || |
| 395 | !PyUpb_InitDescriptor(m) || !PyUpb_InitArena(m) || |
| 396 | !PyUpb_InitExtensionDict(m) || !PyUpb_Map_Init(m) || |
| 397 | !PyUpb_InitMessage(m) || !PyUpb_Repeated_Init(m) || |
| 398 | !PyUpb_UnknownFields_Init(m)) { |
| 399 | Py_DECREF(m); |
| 400 | return NULL; |
| 401 | } |
| 402 | |
| 403 | // Temporary: an cookie we can use in the tests to ensure we are testing upb |
| 404 | // and not another protobuf library on the system. |
| 405 | PyModule_AddIntConstant(m, "_IS_UPB", 1); |
| 406 | |
| 407 | return m; |
| 408 | } |