1/* 2 * Custom subclass of PyUnicode_Type, to allow for transparent bridging of 3 * strings 4 */ 5 6#include "pyobjc.h" 7 8#include <stddef.h> 9#include <Foundation/NSString.h> 10 11typedef struct { 12 PyUnicodeObject base; 13 PyObject* weakrefs; 14 id nsstr; 15 PyObject* py_nsstr; 16} PyObjCUnicodeObject; 17 18PyDoc_STRVAR(class_doc, 19 "objc.pyobjc_unicode\n" 20 "\n" 21 "Subclass of unicode for representing NSString values. Use \n" 22 "the method nsstring to access the NSString. \n" 23 "Note that instances are immutable and won't be updated when\n" 24 "the value of the NSString changes." 25); 26 27static void 28class_dealloc(PyObject* obj) 29{ 30 PyObjCUnicodeObject* uobj = (PyObjCUnicodeObject*)obj; 31 id nsstr = uobj->nsstr; 32 PyObject* weakrefs = uobj->weakrefs; 33 PyObject* py_nsstr = uobj->py_nsstr; 34 35 PyObjC_UnregisterPythonProxy(nsstr, obj); 36 37 Py_XDECREF(py_nsstr); 38 if (nsstr) { 39 CFRelease(nsstr); 40 } 41 42 if (weakrefs) { 43 PyObject_ClearWeakRefs(obj); 44 } 45 46 PyUnicode_Type.tp_dealloc(obj); 47} 48 49static PyObject* 50meth_nsstring(PyObject* self) 51{ 52 PyObjCUnicodeObject* uobj = (PyObjCUnicodeObject*)self; 53 if (uobj->py_nsstr == NULL) { 54 uobj->py_nsstr = PyObjCObject_New(uobj->nsstr, 55 PyObjCObject_kDEFAULT, YES); 56 } 57 Py_INCREF(uobj->py_nsstr); 58 return uobj->py_nsstr; 59} 60 61 62static PyObject* 63meth_getattro(PyObject *o, PyObject *attr_name) 64{ 65 PyObject *res; 66 res = PyObject_GenericGetAttr(o, attr_name); 67 if (res == NULL) { 68 PyErr_Clear(); 69 PyObject *py_nsstr = meth_nsstring(o); 70 res = PyObject_GetAttr(py_nsstr, attr_name); 71 Py_XDECREF(py_nsstr); 72 } 73 return res; 74} 75 76static PyObject* 77meth_reduce(PyObject* self) 78{ 79 PyObject* retVal = NULL; 80 PyObject *v = NULL; 81 PyObject *v2 = NULL; 82 83 retVal = PyTuple_New(2); 84 if (retVal == NULL) goto error; 85 86 v = (PyObject*)&PyUnicode_Type; 87 Py_INCREF(v); 88 PyTuple_SET_ITEM(retVal, 0, v); 89 90 v = PyUnicode_FromObject(self); 91 if (v == NULL ) goto error; 92 93 v2 = PyTuple_New(1); 94 if (v2 == NULL) goto error; 95 PyTuple_SET_ITEM(v2, 0, v); 96 PyTuple_SET_ITEM(retVal, 1, v2); 97 98 return retVal; 99 100error: 101 Py_XDECREF(retVal); 102 Py_XDECREF(v); 103 return NULL; 104} 105 106static PyMethodDef class_methods[] = { 107 { 108 "nsstring", 109 (PyCFunction)meth_nsstring, 110 METH_NOARGS, 111 "directly access NSString instance" 112 }, 113 { 114 "__reduce__", 115 (PyCFunction)meth_reduce, 116 METH_NOARGS, 117 "Used for pickling" 118 }, 119 { 0, 0, 0, 0 } /* sentinel */ 120}; 121 122static PyObject* 123nsstring_get__pyobjc_object__(PyObject *self, void *closure __attribute__((__unused__))) { 124 return meth_nsstring(self); 125} 126 127static PyGetSetDef nsstring_getsetters[] = { 128 { 129 "__pyobjc_object__", 130 (getter)nsstring_get__pyobjc_object__, NULL, 131 "raw NSString instance", 132 NULL 133 }, 134 { 135 NULL, 136 NULL, NULL, 137 NULL, 138 NULL 139 } 140}; 141 142static PyObject* 143class_new( 144 PyTypeObject* type __attribute__((__unused__)), 145 PyObject* args __attribute__((__unused__)), 146 PyObject* kwds __attribute__((__unused__))) 147{ 148 PyErr_SetString(PyExc_TypeError, 149 "Cannot create instances of 'objc.unicode' in Python"); 150 return NULL; 151} 152 153PyTypeObject PyObjCUnicode_Type = { 154 PyVarObject_HEAD_INIT(&PyType_Type, 0) 155 "objc.pyobjc_unicode", /* tp_name */ 156 sizeof(PyObjCUnicodeObject), /* tp_basicsize */ 157 0, /* tp_itemsize */ 158 /* methods */ 159 class_dealloc, /* tp_dealloc */ 160 0, /* tp_print */ 161 0, /* tp_getattr */ 162 0, /* tp_setattr */ 163 0, /* tp_compare */ 164 0, /* tp_repr */ 165 0, /* tp_as_number */ 166 0, /* tp_as_sequence */ 167 0, /* tp_as_mapping */ 168 0, /* tp_hash */ 169 0, /* tp_call */ 170 0, /* tp_str */ 171 meth_getattro, /* tp_getattro */ 172 0, /* tp_setattro */ 173 0, /* tp_as_buffer */ 174 Py_TPFLAGS_DEFAULT, /* tp_flags */ 175 class_doc, /* tp_doc */ 176 0, /* tp_traverse */ 177 0, /* tp_clear */ 178 0, /* tp_richcompare */ 179 offsetof(PyObjCUnicodeObject, weakrefs), /* tp_weaklistoffset */ 180 0, /* tp_iter */ 181 0, /* tp_iternext */ 182 class_methods, /* tp_methods */ 183 0, /* tp_members */ 184 nsstring_getsetters, /* tp_getset */ 185 &PyUnicode_Type, /* tp_base */ 186 0, /* tp_dict */ 187 0, /* tp_descr_get */ 188 0, /* tp_descr_set */ 189 0, /* tp_dictoffset */ 190 0, /* tp_init */ 191 0, /* tp_alloc */ 192 class_new, /* tp_new */ 193 0, /* tp_free */ 194 0, /* tp_is_gc */ 195 0, /* tp_bases */ 196 0, /* tp_mro */ 197 0, /* tp_cache */ 198 0, /* tp_subclasses */ 199 0, /* tp_weaklist */ 200 0 /* tp_del */ 201#if PY_VERSION_HEX >= 0x02060000 202 , 0 /* tp_version_tag */ 203#endif 204 205}; 206 207 208#if PY_VERSION_HEX >= 0x03030000 209 /* 210 * Python 3.3 introduced a new, more efficient representation 211 * for unicode objects. 212 * 213 * This function cannot use the most efficient 214 * representation where the character data is stored in the same 215 * memory block as the object header because PyObjCUnicode adds 216 * more data to the object header, which PyUnicode does not 217 * expect. 218 * 219 * This function therefore creates a "legacy string, ready" (see 220 * unicodeobject.h in the python 3.3 source tree for more information) 221 * 222 * 223 * XXX: I'm not very happy about this implementation, it is too verbose 224 * and seems to be even more fragile than the implementation for 225 * older python versions. 226 */ 227PyObject* 228PyObjCUnicode_New(NSString* value) 229{ 230 PyObjCUnicodeObject* result; 231 PyASCIIObject *ascii; 232 PyCompactUnicodeObject *compact; 233 234 NSInteger i, length; 235 unichar* volatile characters = NULL; 236 NSRange range; 237 238 PyObjC_DURING 239 length = [value length]; 240 characters = PyObject_MALLOC(sizeof(unichar) * (length+1)); 241 if (characters == NULL) { 242 PyErr_NoMemory(); 243 NS_VALUERETURN(NULL, PyObject*); 244 } 245 246 range = NSMakeRange(0, length); 247 248 [value getCharacters: characters range: range]; 249 characters[length] = 0; 250 251 PyObjC_HANDLER 252 if (characters) { 253 PyMem_Free(characters); 254 characters = NULL; 255 } 256 PyObjCErr_FromObjC(localException); 257 NS_VALUERETURN(NULL, PyObject*); 258 PyObjC_ENDHANDLER 259 260 result = PyObject_New(PyObjCUnicodeObject, &PyObjCUnicode_Type); 261 ascii = (PyASCIIObject*)result; 262 compact = (PyCompactUnicodeObject*)result; 263 264 ascii->hash = -1; 265 ascii->wstr = NULL; 266 ascii->length = length; 267 268 ascii->state.compact = 0; 269 ascii->state.ready = 1; 270 ascii->state.interned = SSTATE_NOT_INTERNED; 271 272 compact->utf8_length = 0; 273 compact->utf8 = NULL; 274 compact->wstr_length = 0; 275 276 result->base.data.any = NULL; 277 278 Py_UCS4 maxchar = 0; 279 int nr_surrogates = 0; 280 for (i = 0; i < length; i++) { 281 Py_UCS4 cur = (Py_UCS4)characters[i]; 282 if (Py_UNICODE_IS_HIGH_SURROGATE(cur) && ( 283 i < length - 1) && ( 284 Py_UNICODE_IS_LOW_SURROGATE(characters[i+1]))) { 285 Py_UCS4 ch = Py_UNICODE_JOIN_SURROGATES( 286 characters[i], 287 characters[i+1]); 288 i++; 289 nr_surrogates++; 290 if (ch > maxchar) { 291 maxchar = ch; 292 } 293 } else if (cur > maxchar) { 294 maxchar = cur; 295 } 296 } 297 if (maxchar <= 128) { 298 ascii->state.ascii = 1; 299 ascii->state.kind = PyUnicode_1BYTE_KIND; 300 } else if (maxchar <= 255) { 301 ascii->state.ascii = 0; 302 ascii->state.kind = PyUnicode_1BYTE_KIND; 303 } else if (maxchar <= 0xFFFF) { 304 ascii->state.ascii = 0; 305 ascii->state.kind = PyUnicode_2BYTE_KIND; 306 } else { 307 ascii->state.ascii = 0; 308 ascii->state.kind = PyUnicode_4BYTE_KIND; 309 } 310 311 /* Create storage for the code points and copy the data */ 312 result->base.data.any = NULL; 313 if (ascii->state.kind == PyUnicode_1BYTE_KIND) { 314 result->base.data.latin1 = PyObject_MALLOC(sizeof(Py_UCS1) * (length + 1 - nr_surrogates)); 315 if (result->base.data.latin1 == NULL) { 316 Py_DECREF((PyObject*)result); 317 PyMem_Free(characters); characters = NULL; 318 PyErr_NoMemory(); 319 return NULL; 320 } 321 Py_UCS1* latin1_cur = result->base.data.latin1; 322 for (i = 0; i < length; i++) { 323 if (Py_UNICODE_IS_HIGH_SURROGATE(characters[i]) && ( 324 i < length - 1) && ( 325 Py_UNICODE_IS_LOW_SURROGATE(characters[i+1]))) { 326 Py_UCS4 ch = Py_UNICODE_JOIN_SURROGATES( 327 characters[i], 328 characters[i+1]); 329 *latin1_cur++ = (Py_UCS1)ch; 330 i++; 331 } else { 332 *latin1_cur++ = (Py_UCS1)characters[i]; 333 } 334 } 335 *latin1_cur = 0; 336 ascii->length = length - nr_surrogates; 337 if (ascii->state.ascii) { 338 /* With ASCII representation the UTF8 representation is 339 * also known without further calculation, and MUST be 340 * filled according to the spec 341 */ 342 compact->utf8_length = length - nr_surrogates; 343 compact->utf8 = (char*)result->base.data.latin1; 344 } 345 346 } else if (ascii->state.kind == PyUnicode_2BYTE_KIND) { 347 if (nr_surrogates == 0) { 348 /* No surrogates and 2BYTE_KIND, this means the unichar buffer 349 * can be reused as storage for the python unicode string 350 */ 351 ascii->length = length; 352 result->base.data.ucs2 = (Py_UCS2*)characters; 353 characters = NULL; 354 355 } else { 356 result->base.data.ucs2 = PyObject_MALLOC(sizeof(Py_UCS2) * (length + 1 - nr_surrogates)); 357 if (result->base.data.ucs2 == NULL) { 358 Py_DECREF((PyObject*)result); 359 PyMem_Free(characters); characters = NULL; 360 PyErr_NoMemory(); 361 return NULL; 362 } 363 Py_UCS2* ucs2_cur = result->base.data.ucs2; 364 for (i = 0; i < length; i++) { 365 if (Py_UNICODE_IS_HIGH_SURROGATE(characters[i]) && ( 366 i < length - 1) && ( 367 Py_UNICODE_IS_LOW_SURROGATE(characters[i+1]))) { 368 Py_UCS4 ch = Py_UNICODE_JOIN_SURROGATES( 369 characters[i], 370 characters[i+1]); 371 *ucs2_cur++ = (Py_UCS2)ch; 372 i++; 373 } else { 374 *ucs2_cur++ = (Py_UCS2)characters[i]; 375 } 376 } 377 ascii->length = length - nr_surrogates; 378 *ucs2_cur = 0; 379 } 380#if SIZEOF_WCHAR_T == 2 381 ascii->wstr = (wchar_t*)(result->base.data.ucs4); 382 compact->wstr_length = ascii->length; 383#endif 384 385 } else { /* 4BYTE_KIND */ 386 result->base.data.ucs4 = PyObject_MALLOC(sizeof(Py_UCS4) * (length + 1 - nr_surrogates)); 387 if (result->base.data.ucs4 == NULL) { 388 Py_DECREF((PyObject*)result); 389 PyMem_Free(characters); characters = NULL; 390 PyErr_NoMemory(); 391 return NULL; 392 } 393 394 Py_UCS4* ucs4_cur = result->base.data.ucs4; 395 for (i = 0; i < length; i++) { 396 if (Py_UNICODE_IS_HIGH_SURROGATE(characters[i]) && ( 397 i < length - 1) && ( 398 Py_UNICODE_IS_LOW_SURROGATE(characters[i+1]))) { 399 Py_UCS4 ch = Py_UNICODE_JOIN_SURROGATES( 400 characters[i], 401 characters[i+1]); 402 403 if (ch > 0x10ffff) { 404 /* Unicode spec has a maximum code point value and 405 * Python 3.3 enforces this, keep surrogate pair 406 * to avoid an error. 407 */ 408 *ucs4_cur++ = (Py_UCS4)characters[i]; 409 } else { 410 *ucs4_cur++ = (Py_UCS4)ch; 411 i++; 412 } 413 } else { 414 *ucs4_cur++ = (Py_UCS4)characters[i]; 415 } 416 } 417 *ucs4_cur = 0; 418 ascii->length = length - nr_surrogates; 419#if SIZEOF_WCHAR_T == 4 420 ascii->wstr = (wchar_t*)(result->base.data.ucs4); 421 compact->wstr_length = ascii->length; 422#endif 423 } 424 425 426 if (characters != NULL) { 427 PyObject_DEL(characters); 428 characters = NULL; 429 } 430 431 432#ifdef Py_DEBUG 433 /* Check that the unicode object is correct */ 434 _PyUnicode_CheckConsistency((PyObject*)result, 1); 435#endif 436 437 /* Finally store PyUnicode specific data */ 438 result->weakrefs = NULL; 439 result->py_nsstr = NULL; 440 result->nsstr = value; 441 CFRetain(value); 442 443 return (PyObject*)result; 444} 445 446#else /* Python 3.2 and before */ 447PyObject* 448PyObjCUnicode_New(NSString* value) 449{ 450 /* Conversion to PyUnicode without creating an autoreleased object. 451 * 452 * NOTE: A final optimization is removing the copy of 'characters', but 453 * that can only be done when sizeof(unichar) == Py_UNICODE_SIZE. 454 * 455 * The reason for doing this: NSThread 456 * +detachNewThreadSelector:toTarget:withObject:, with a string 457 * as one of the arguments. 458 * 459 * Another reason is that the following loop 'leaks' memory when using 460 * -UTF8String: 461 * while True: 462 * NSString.alloc().init() 463 * 464 * while the following doesn't: 465 * 466 * while True: 467 * NSArray.alloc().init() 468 */ 469 PyObjCUnicodeObject* result; 470 471#ifdef PyObjC_UNICODE_FAST_PATH 472 Py_ssize_t length = [value length]; 473 NSRange range; 474 475 if (length < 0) { 476 PyErr_SetString(PyExc_SystemError, "string with negative length"); 477 return NULL; 478 } 479 result = PyObject_New(PyObjCUnicodeObject, &PyObjCUnicode_Type); 480 Py_UNICODE* tptr = PyObject_MALLOC(sizeof(Py_UNICODE) * (length+1)); 481 tptr[0] = tptr[length] = 0; 482 result->base.str = tptr; 483 /*PyUnicode_AS_UNICODE(result) = tptr;*/ 484 tptr = NULL; 485 486 if (PyUnicode_AS_UNICODE(result) == NULL) { 487 Py_DECREF((PyObject*)result); 488 PyErr_NoMemory(); 489 return NULL; 490 } 491 range = NSMakeRange(0, length); 492 [value getCharacters:(unichar *)PyUnicode_AS_UNICODE(result) range:range]; 493 /*PyUnicode_GET_SIZE(result) = length;*/ 494 result->base.length = length; 495#else 496 int i, length; 497 unichar* volatile characters = NULL; 498 NSRange range; 499 500 PyObjC_DURING 501 length = [value length]; 502 characters = PyMem_Malloc(sizeof(unichar) * length); 503 if (characters == NULL) { 504 PyErr_NoMemory(); 505 NS_VALUERETURN(NULL, PyObject*); 506 } 507 508 range = NSMakeRange(0, length); 509 510 [value getCharacters: characters range: range]; 511 512 PyObjC_HANDLER 513 if (characters) { 514 PyMem_Free(characters); 515 characters = NULL; 516 } 517 PyObjCErr_FromObjC(localException); 518 NS_VALUERETURN(NULL, PyObject*); 519 PyObjC_ENDHANDLER 520 521 result = PyObject_New(PyObjCUnicodeObject, &PyObjCUnicode_Type); 522 Py_UNICODE* tptr = PyObject_MALLOC(sizeof(Py_UNICODE) * (length+1)); 523 tptr[0] = tptr[length] = 0; 524 result->base.str = tptr; 525 if (PyUnicode_AS_UNICODE(result) == NULL) { 526 Py_DECREF((PyObject*)result); 527 PyMem_Free(characters); characters = NULL; 528 PyErr_NoMemory(); 529 return NULL; 530 } 531 /*PyUnicode_GET_SIZE(result) = length;*/ 532 result->base.length = length; 533 for (i = 0; i < length; i++) { 534 PyUnicode_AS_UNICODE(result)[i] = (Py_UNICODE)(characters[i]); 535 } 536 PyMem_Free(characters); characters = NULL; 537#endif 538 539 540 result->base.hash = -1; 541#if PY_MAJOR_VERSION == 3 542 result->base.state = 0; 543#endif 544 result->base.defenc = NULL; 545 546 if (PyUnicode_GET_SIZE(result) == 0) { 547 result->base.hash = 0; 548 } 549 550 result->weakrefs = NULL; 551 result->py_nsstr = NULL; 552 result->nsstr = value; 553 CFRetain(value); 554 555 return (PyObject*)result; 556} 557#endif /* Python 3.2 and before */ 558 559NSString* 560PyObjCUnicode_Extract(PyObject* value) 561{ 562 if (!PyObjCUnicode_Check(value)) { 563 PyErr_BadInternalCall(); 564 return NULL; 565 } 566 567 return ((PyObjCUnicodeObject*)value)->nsstr; 568} 569