1 /** 2 Mirror _unicodeobject.h 3 4 Unicode API names are mangled to assure that UCS-2 and UCS-4 builds 5 produce different external names and thus cause import errors in 6 case Python interpreters and extensions with mixed compiled in 7 Unicode width assumptions are combined. 8 */ 9 module deimos.python.unicodeobject; 10 11 import core.stdc.stdarg; 12 import core.stdc.string; 13 import deimos.python.pyport; 14 import deimos.python.object; 15 16 extern(C): 17 // Python-header-file: Include/unicodeobject.h: 18 19 /** Py_UNICODE is the native Unicode storage format (code unit) used by 20 Python and represents a single Unicode element in the Unicode 21 type. */ 22 version (Python_Unicode_UCS2) { 23 version (Windows) { 24 alias wchar Py_UNICODE; 25 } else { 26 alias ushort Py_UNICODE; 27 } 28 } else { 29 alias uint Py_UNICODE; 30 } 31 alias Py_UNICODE Py_UCS4; 32 alias ubyte Py_UCS1; 33 alias ushort Py_UCS2; 34 35 version(Python_3_4_Or_Later) { 36 /** There are 4 forms of Unicode strings: 37 - compact ascii: 38 * structure = PyASCIIObject 39 * test: PyUnicode_IS_COMPACT_ASCII(op) 40 * kind = PyUnicode_1BYTE_KIND 41 * compact = 1 42 * ascii = 1 43 * ready = 1 44 * (length is the length of the utf8 and wstr strings) 45 * (data starts just after the structure) 46 * (since ASCII is decoded from UTF-8, the utf8 string are the data) 47 - compact: 48 * structure = PyCompactUnicodeObject 49 * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) 50 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 51 PyUnicode_4BYTE_KIND 52 * compact = 1 53 * ready = 1 54 * ascii = 0 55 * utf8 is not shared with data 56 * utf8_length = 0 if utf8 is NULL 57 * wstr is shared with data and wstr_length=length 58 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 59 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 60 * wstr_length = 0 if wstr is NULL 61 * (data starts just after the structure) 62 - legacy string, not ready: 63 * structure = PyUnicodeObject 64 * test: kind == PyUnicode_WCHAR_KIND 65 * length = 0 (use wstr_length) 66 * hash = -1 67 * kind = PyUnicode_WCHAR_KIND 68 * compact = 0 69 * ascii = 0 70 * ready = 0 71 * interned = SSTATE_NOT_INTERNED 72 * wstr is not NULL 73 * data.any is NULL 74 * utf8 is NULL 75 * utf8_length = 0 76 - legacy string, ready: 77 * structure = PyUnicodeObject structure 78 * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND 79 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 80 PyUnicode_4BYTE_KIND 81 * compact = 0 82 * ready = 1 83 * data.any is not NULL 84 * utf8 is shared and utf8_length = length with data.any if ascii = 1 85 * utf8_length = 0 if utf8 is NULL 86 * wstr is shared with data.any and wstr_length = length 87 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 88 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 89 * wstr_length = 0 if wstr is NULL 90 Compact strings use only one memory block (structure + characters), 91 whereas legacy strings use one block for the structure and one block 92 for characters. 93 Legacy strings are created by PyUnicode_FromUnicode() and 94 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready 95 when PyUnicode_READY() is called. 96 See also _PyUnicode_CheckConsistency(). 97 Availability >= 3.4 98 */ 99 struct PyASCIIObject { 100 mixin PyObject_HEAD; 101 /** Number of code points in the string */ 102 Py_ssize_t length; 103 /** Hash value; -1 if not set */ 104 Py_hash_t hash; 105 /// _ 106 int state; 107 /** wchar_t representation (null-terminated) */ 108 wchar* wstr; 109 } 110 111 /// Availability >= 3.4 112 struct PyCompactUnicodeObject { 113 /// _ 114 PyASCIIObject _base; 115 /// _ 116 Py_ssize_t utf8_length; 117 /// _ 118 char* utf8; 119 /// _ 120 Py_ssize_t wstr_length; 121 } 122 123 /** 124 subclass of PyObject. 125 */ 126 struct PyUnicodeObject { 127 PyCompactUnicodeObject _base; 128 PyUnicodeObject_data data; 129 } 130 131 union PyUnicodeObject_data { 132 void* any; 133 Py_UCS1* latin1; 134 Py_UCS2* ucs2; 135 Py_UCS4* ucs4; 136 } 137 }else{ 138 /** 139 subclass of PyObject. 140 */ 141 struct PyUnicodeObject { 142 mixin PyObject_HEAD; 143 /** Length of raw Unicode data in buffer */ 144 Py_ssize_t length; 145 /** Raw Unicode buffer */ 146 Py_UNICODE* str; 147 /** Hash value; -1 if not set */ 148 C_long hash; 149 /** (Default) Encoded version as Python 150 string, or NULL; this is used for 151 implementing the buffer protocol */ 152 PyObject* defenc; 153 } 154 } 155 156 /// _ 157 mixin(PyAPI_DATA!"PyTypeObject PyUnicode_Type"); 158 159 // D translations of C macros: 160 /** Fast access macros */ 161 int PyUnicode_Check()(PyObject* op) { 162 return PyObject_TypeCheck(op, &PyUnicode_Type); 163 } 164 /// ditto 165 int PyUnicode_CheckExact()(PyObject* op) { 166 return Py_TYPE(op) == &PyUnicode_Type; 167 } 168 169 /// ditto 170 size_t PyUnicode_GET_SIZE()(PyUnicodeObject* op) { 171 return op.length; 172 } 173 /// ditto 174 size_t PyUnicode_GET_DATA_SIZE()(PyUnicodeObject* op) { 175 return op.length * Py_UNICODE.sizeof; 176 } 177 /// ditto 178 Py_UNICODE* PyUnicode_AS_UNICODE()(PyUnicodeObject* op) { 179 return op.str; 180 } 181 /// ditto 182 const(char)* PyUnicode_AS_DATA()(PyUnicodeObject* op) { 183 return cast(const(char)*) op.str; 184 } 185 186 /** This Unicode character will be used as replacement character during 187 decoding if the errors argument is set to "replace". Note: the 188 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 189 Unicode 3.0. */ 190 enum Py_UNICODE Py_UNICODE_REPLACEMENT_CHARACTER = 0xFFFD; 191 192 version(Python_3_3_Or_Later) { 193 enum PyUnicode_ = "PyUnicode_"; 194 }else version(Python_Unicode_UCS2) { 195 enum PyUnicode_ = "PyUnicodeUCS2_"; 196 }else{ 197 enum PyUnicode_ = "PyUnicodeUCS4_"; 198 } 199 200 /* 201 this function takes defs PyUnicode_XX and transforms them to 202 PyUnicodeUCS4_XX(); 203 alias PyUnicodeUCS4_XX PyUnicode_XX; 204 205 */ 206 string substitute_and_alias()(string code) { 207 import std.algorithm; 208 import std.array; 209 string[] newcodes; 210 LOOP: 211 while(true) { 212 if(startsWith(code,"/*")) { 213 size_t comm_end_index = countUntil(code[2 .. $], "*/"); 214 if(comm_end_index == -1) break; 215 newcodes ~= code[0 .. comm_end_index]; 216 code = code[comm_end_index .. $]; 217 continue; 218 } 219 if(!(startsWith(code,"PyUnicode_") || startsWith(code,"_PyUnicode"))) { 220 size_t index = 0; 221 while(index < code.length) { 222 if(code[index] == '_') { 223 if(startsWith(code[index .. $], "_PyUnicode_")) { 224 break; 225 } 226 }else if(code[index] == 'P') { 227 if(startsWith(code[index .. $], "PyUnicode_")) { 228 break; 229 } 230 }else if(code[index] == '/') { 231 if(startsWith(code[index .. $], "/*")) { 232 break; 233 } 234 } 235 index++; 236 } 237 if(index == code.length) break; 238 newcodes ~= code[0 .. index]; 239 code = code[index .. $]; 240 continue; 241 } 242 size_t end_index = countUntil(code, "("); 243 if(end_index == -1) break; 244 string alias_name = code[0 .. end_index]; 245 string func_name = replace(alias_name, "PyUnicode_", PyUnicode_); 246 size_t index0 = end_index+1; 247 int parencount = 1; 248 while(parencount && index0 < code.length) { 249 if(startsWith(code[index0 .. $], "/*")) { 250 size_t comm_end_index = countUntil(code[index0+2 .. $], "*/"); 251 if(comm_end_index == -1) break LOOP; 252 index0 += comm_end_index; 253 continue; 254 }else if(code[index0] == '(') { 255 parencount++; 256 index0++; 257 }else if(code[index0] == ')') { 258 parencount--; 259 index0++; 260 }else{ 261 index0++; 262 } 263 } 264 size_t semi = countUntil(code[index0 .. $], ";"); 265 if(semi == -1) break; 266 index0 += semi+1; 267 268 string alias_line = "\nalias " ~ func_name ~ " " ~ alias_name ~ ";\n"; 269 newcodes ~= func_name; 270 newcodes ~= code[end_index .. index0]; 271 newcodes ~= "\n /// ditto \n"; 272 newcodes ~= alias_line; 273 274 code = code[index0 .. $]; 275 } 276 277 string newcode; 278 foreach(c; newcodes) { 279 newcode ~= c; 280 } 281 return newcode; 282 } 283 284 enum string unicode_funs = q{ 285 version(Python_2_6_Or_Later) { 286 287 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 288 size. 289 290 u may be NULL which causes the contents to be undefined. It is the 291 user's responsibility to fill in the needed data afterwards. Note 292 that modifying the Unicode object contents after construction is 293 only allowed if u was set to NULL. 294 295 The buffer is copied into the new object. */ 296 /// Availability: >= 2.6 297 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 298 299 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 300 /// Availability: >= 2.6 301 PyObject* PyUnicode_FromStringAndSize( 302 const(char)*u, /* char buffer */ 303 Py_ssize_t size /* size of buffer */ 304 ); 305 306 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 307 Latin-1 encoded bytes */ 308 /// Availability: >= 2.6 309 PyObject* PyUnicode_FromString( 310 const(char)*u /* string */ 311 ); 312 /// Availability: >= 2.6 313 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 314 /// Availability: >= 2.6 315 PyObject* PyUnicode_FromFormat(const(char)*, ...); 316 317 /** Format the object based on the format_spec, as defined in PEP 3101 318 (Advanced String Formatting). */ 319 /// Availability: >= 2.6 320 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 321 Py_UNICODE *format_spec, 322 Py_ssize_t format_spec_len); 323 /// Availability: >= 2.6 324 int PyUnicode_ClearFreeList(); 325 /** 326 Params: 327 string = UTF-7 encoded string 328 length = size of string 329 error = error handling 330 consumed = bytes consumed 331 */ 332 /// Availability: >= 2.6 333 PyObject* PyUnicode_DecodeUTF7Stateful( 334 const(char)* string, 335 Py_ssize_t length, 336 const(char)*errors, 337 Py_ssize_t *consumed 338 ); 339 /** 340 Params: 341 string = UTF-32 encoded string 342 length = size of string 343 error = error handling 344 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 345 */ 346 /// Availability: >= 2.6 347 PyObject* PyUnicode_DecodeUTF32( 348 const(char)* string, 349 Py_ssize_t length, 350 const(char)*errors, 351 int *byteorder 352 ); 353 354 /** 355 Params: 356 string = UTF-32 encoded string 357 length = size of string 358 error = error handling 359 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 360 */ 361 /// Availability: >= 2.6 362 PyObject* PyUnicode_DecodeUTF32Stateful( 363 const(char)*string, 364 Py_ssize_t length, 365 const(char)*errors, 366 int *byteorder, 367 Py_ssize_t *consumed 368 ); 369 /** Returns a Python string using the UTF-32 encoding in native byte 370 order. The string always starts with a BOM mark. */ 371 /// Availability: >= 2.6 372 373 PyObject* PyUnicode_AsUTF32String( 374 PyObject *unicode 375 ); 376 377 /** Returns a Python string object holding the UTF-32 encoded value of 378 the Unicode data. 379 380 If byteorder is not 0, output is written according to the following 381 byte order: 382 383 byteorder == -1: little endian 384 byteorder == 0: native byte order (writes a BOM mark) 385 byteorder == 1: big endian 386 387 If byteorder is 0, the output string will always start with the 388 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 389 prepended. 390 Params: 391 data = Unicode char buffer 392 length = number of Py_UNICODE chars to encode 393 errors = error handling 394 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 395 396 */ 397 /// Availability: >= 2.6 398 PyObject* PyUnicode_EncodeUTF32( 399 const Py_UNICODE *data, 400 Py_ssize_t length, 401 const(char)* errors, 402 int byteorder 403 ); 404 } 405 406 /** Return a read-only pointer to the Unicode object's internal 407 Py_UNICODE buffer. */ 408 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 409 /** Get the length of the Unicode object. */ 410 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 411 412 /** Get the maximum ordinal for a Unicode character. */ 413 Py_UNICODE PyUnicode_GetMax(); 414 415 /** Resize an already allocated Unicode object to the new size length. 416 417 _*unicode is modified to point to the new (resized) object and 0 418 returned on success. 419 420 This API may only be called by the function which also called the 421 Unicode constructor. The refcount on the object must be 1. Otherwise, 422 an error is returned. 423 424 Error handling is implemented as follows: an exception is set, -1 425 is returned and *unicode left untouched. 426 Params: 427 unicode = pointer to the new unicode object. 428 length = New length. 429 430 */ 431 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 432 /** Coerce obj to an Unicode object and return a reference with 433 _*incremented* refcount. 434 435 Coercion is done in the following way: 436 437 1. String and other char buffer compatible objects are decoded 438 under the assumptions that they contain data using the current 439 default encoding. Decoding is done in "strict" mode. 440 441 2. All other objects (including Unicode objects) raise an 442 exception. 443 444 The API returns NULL in case of an error. The caller is responsible 445 for decref'ing the returned objects. 446 447 */ 448 PyObject* PyUnicode_FromEncodedObject( 449 PyObject* obj, 450 const(char)* encoding, 451 const(char)* errors); 452 453 /** Coerce obj to an Unicode object and return a reference with 454 _*incremented* refcount. 455 456 Unicode objects are passed back as-is (subclasses are converted to 457 true Unicode objects), all other objects are delegated to 458 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 459 using the default encoding as basis for decoding the object. 460 461 The API returns NULL in case of an error. The caller is responsible 462 for decref'ing the returned objects. 463 464 */ 465 PyObject* PyUnicode_FromObject(PyObject* obj); 466 467 /** Create a Unicode Object from the whcar_t buffer w of the given 468 size. 469 470 The buffer is copied into the new object. */ 471 PyObject* PyUnicode_FromWideChar(const(wchar)* w, Py_ssize_t size); 472 473 /** Copies the Unicode Object contents into the wchar_t buffer w. At 474 most size wchar_t characters are copied. 475 476 Note that the resulting wchar_t string may or may not be 477 0-terminated. It is the responsibility of the caller to make sure 478 that the wchar_t string is 0-terminated in case this is required by 479 the application. 480 481 Returns the number of wchar_t characters copied (excluding a 482 possibly trailing 0-termination character) or -1 in case of an 483 error. */ 484 Py_ssize_t PyUnicode_AsWideChar( 485 PyUnicodeObject* unicode, 486 const(wchar)* w, 487 Py_ssize_t size); 488 489 /** Create a Unicode Object from the given Unicode code point ordinal. 490 491 The ordinal must be in range(0x10000) on narrow Python builds 492 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 493 raised in case it is not. 494 495 */ 496 PyObject* PyUnicode_FromOrdinal(int ordinal); 497 498 /** Return a Python string holding the default encoded value of the 499 Unicode object. 500 501 The resulting string is cached in the Unicode object for subsequent 502 usage by this function. The cached version is needed to implement 503 the character buffer interface and will live (at least) as long as 504 the Unicode object itself. 505 506 The refcount of the string is *not* incremented. 507 508 _*** Exported for internal use by the interpreter only !!! *** 509 510 */ 511 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 512 513 /** Returns the currently active default encoding. 514 515 The default encoding is currently implemented as run-time settable 516 process global. This may change in future versions of the 517 interpreter to become a parameter which is managed on a per-thread 518 basis. 519 520 */ 521 const(char)* PyUnicode_GetDefaultEncoding(); 522 523 /** Sets the currently active default encoding. 524 525 Returns 0 on success, -1 in case of an error. 526 527 */ 528 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 529 530 /** Create a Unicode object by decoding the encoded string s of the 531 given size. 532 Params: 533 s = encoded string 534 size = size of buffer 535 encoding = encoding 536 errors = error handling 537 */ 538 PyObject* PyUnicode_Decode( 539 const(char)* s, 540 Py_ssize_t size, 541 const(char)* encoding, 542 const(char)* errors); 543 544 version(Python_3_6_Or_Later) { 545 /** Decode a Unicode object unicode and return the result as Python 546 object. */ 547 /// Deprecated in 3.6 548 deprecated("Deprecated in 3.6") 549 PyObject* PyUnicode_AsDecodedObject( 550 PyObject* unicode, 551 const(char)* encoding, 552 const(char)* errors 553 ); 554 /** Decode a Unicode object unicode and return the result as Unicode 555 object. */ 556 /// Availability: 3.* 557 558 /// Deprecated in 3.6 559 deprecated("Deprecated in 3.6") 560 PyObject* PyUnicode_AsDecodedUnicode( 561 PyObject* unicode, 562 const(char)* encoding, 563 const(char)* errors 564 ); 565 }else version(Python_3_0_Or_Later) { 566 /** Decode a Unicode object unicode and return the result as Python 567 object. */ 568 /// Availability: 3.* 569 PyObject* PyUnicode_AsDecodedObject( 570 PyObject* unicode, 571 const(char)* encoding, 572 const(char)* errors 573 ); 574 /** Decode a Unicode object unicode and return the result as Unicode 575 object. */ 576 /// Availability: 3.* 577 578 PyObject* PyUnicode_AsDecodedUnicode( 579 PyObject* unicode, 580 const(char)* encoding, 581 const(char)* errors 582 ); 583 } 584 585 /** Encodes a Py_UNICODE buffer of the given size and returns a 586 Python string object. 587 Params: 588 s = Unicode char buffer 589 size = number of Py_UNICODE chars to encode 590 encoding = encoding 591 errors = error handling 592 */ 593 PyObject* PyUnicode_Encode( 594 Py_UNICODE* s, 595 Py_ssize_t size, 596 const(char)* encoding, 597 const(char)* errors); 598 599 version(Python_3_6_Or_Later) { 600 /** Encodes a Unicode object and returns the result as Python object. 601 */ 602 deprecated("Deprecated in 3.6") 603 PyObject* PyUnicode_AsEncodedObject( 604 PyObject* unicode, 605 const(char)* encoding, 606 const(char)* errors); 607 }else{ 608 /** Encodes a Unicode object and returns the result as Python object. 609 */ 610 PyObject* PyUnicode_AsEncodedObject( 611 PyObject* unicode, 612 const(char)* encoding, 613 const(char)* errors); 614 } 615 616 /** Encodes a Unicode object and returns the result as Python string 617 object. */ 618 PyObject* PyUnicode_AsEncodedString( 619 PyObject* unicode, 620 const(char)* encoding, 621 const(char)* errors); 622 623 version(Python_3_0_Or_Later) { 624 /** Encodes a Unicode object and returns the result as Unicode 625 object. */ 626 deprecated("Deprecated in 3.6") 627 PyObject* PyUnicode_AsEncodedUnicode( 628 PyObject* unicode, 629 const(char)* encoding, 630 const(char)* errors 631 ); 632 }else version(Python_3_0_Or_Later) { 633 /** Encodes a Unicode object and returns the result as Unicode 634 object. */ 635 /// Availability: >= 3.* 636 PyObject* PyUnicode_AsEncodedUnicode( 637 PyObject* unicode, 638 const(char)* encoding, 639 const(char)* errors 640 ); 641 } 642 643 /** 644 Params: 645 string = UTF-7 encoded string 646 length = size of string 647 errors = error handling 648 */ 649 PyObject* PyUnicode_DecodeUTF7( 650 const(char)* string, 651 Py_ssize_t length, 652 const(char)* errors); 653 654 /** 655 Params: 656 data = Unicode char buffer 657 length = number of Py_UNICODE chars to encode 658 base64SetO = Encode RFC2152 Set O characters in base64 659 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 660 errors = error handling 661 */ 662 PyObject* PyUnicode_EncodeUTF7( 663 Py_UNICODE* data, 664 Py_ssize_t length, 665 int encodeSetO, 666 int encodeWhiteSpace, 667 const(char)* errors 668 ); 669 670 /// _ 671 PyObject* PyUnicode_DecodeUTF8( 672 const(char)* string, 673 Py_ssize_t length, 674 const(char)* errors); 675 /// _ 676 PyObject* PyUnicode_DecodeUTF8Stateful( 677 const(char)* string, 678 Py_ssize_t length, 679 const(char)* errors, 680 Py_ssize_t* consumed 681 ); 682 /// _ 683 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 684 /// _ 685 PyObject* PyUnicode_EncodeUTF8( 686 Py_UNICODE* data, 687 Py_ssize_t length, 688 const(char) *errors); 689 690 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 691 the corresponding Unicode object. 692 693 errors (if non-NULL) defines the error handling. It defaults 694 to "strict". 695 696 If byteorder is non-NULL, the decoder starts decoding using the 697 given byte order: 698 699 *byteorder == -1: little endian 700 *byteorder == 0: native order 701 *byteorder == 1: big endian 702 703 In native mode, the first two bytes of the stream are checked for a 704 BOM mark. If found, the BOM mark is analysed, the byte order 705 adjusted and the BOM skipped. In the other modes, no BOM mark 706 interpretation is done. After completion, *byteorder is set to the 707 current byte order at the end of input data. 708 709 If byteorder is NULL, the codec starts in native order mode. 710 711 */ 712 PyObject* PyUnicode_DecodeUTF16( 713 const(char)* string, 714 Py_ssize_t length, 715 const(char)* errors, 716 int* byteorder); 717 /** 718 Params: 719 string = UTF-16 encoded string 720 length = size of string 721 errors = error handling 722 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 723 consumed = bytes consumed 724 */ 725 PyObject* PyUnicode_DecodeUTF16Stateful( 726 const(char)* string, 727 Py_ssize_t length, 728 const(char)* errors, 729 int* byteorder, 730 Py_ssize_t* consumed 731 ); 732 /** Returns a Python string using the UTF-16 encoding in native byte 733 order. The string always starts with a BOM mark. */ 734 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 735 /** Returns a Python string object holding the UTF-16 encoded value of 736 the Unicode data. 737 738 If byteorder is not 0, output is written according to the following 739 byte order: 740 741 byteorder == -1: little endian 742 byteorder == 0: native byte order (writes a BOM mark) 743 byteorder == 1: big endian 744 745 If byteorder is 0, the output string will always start with the 746 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 747 prepended. 748 749 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 750 UCS-2. This trick makes it possible to add full UTF-16 capabilities 751 at a later point without compromising the APIs. 752 753 */ 754 PyObject* PyUnicode_EncodeUTF16( 755 Py_UNICODE* data, 756 Py_ssize_t length, 757 const(char)* errors, 758 int byteorder 759 ); 760 761 /// _ 762 PyObject* PyUnicode_DecodeUnicodeEscape( 763 const(char)* string, 764 Py_ssize_t length, 765 const(char)* errors); 766 /// _ 767 PyObject* PyUnicode_AsUnicodeEscapeString( 768 PyObject* unicode); 769 /// _ 770 PyObject* PyUnicode_EncodeUnicodeEscape( 771 Py_UNICODE* data, 772 Py_ssize_t length); 773 /** 774 Params: 775 string = Raw-Unicode-Escape encoded string 776 length = size of string 777 errors = error handling 778 */ 779 PyObject* PyUnicode_DecodeRawUnicodeEscape( 780 const(char)* string, 781 Py_ssize_t length, 782 const(char)* errors); 783 /// _ 784 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 785 /// _ 786 PyObject* PyUnicode_EncodeRawUnicodeEscape( 787 Py_UNICODE* data, Py_ssize_t length); 788 789 /// _ 790 PyObject* _PyUnicode_DecodeUnicodeInternal( 791 const(char)* string, 792 Py_ssize_t length, 793 const(char)* errors); 794 795 /** 796 Params: 797 string = Latin-1 encoded string 798 length = size of string 799 errors = error handling 800 */ 801 PyObject* PyUnicode_DecodeLatin1( 802 const(char)* string, 803 Py_ssize_t length, 804 const(char)* errors); 805 /// _ 806 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 807 /** 808 Params: 809 data = Unicode char buffer 810 length = Number of Py_UNICODE chars to encode 811 errors = error handling 812 */ 813 PyObject* PyUnicode_EncodeLatin1( 814 Py_UNICODE* data, 815 Py_ssize_t length, 816 const(char)* errors); 817 818 /** 819 Params: 820 data = Unicode char buffer 821 length = Number of Py_UNICODE chars to encode 822 errors = error handling 823 */ 824 PyObject* PyUnicode_DecodeASCII( 825 const(char)* string, 826 Py_ssize_t length, 827 const(char)* errors); 828 /// _ 829 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 830 /** 831 Params: 832 data = Unicode char buffer 833 length = Number of Py_UNICODE chars to encode 834 errors = error handling 835 */ 836 PyObject* PyUnicode_EncodeASCII( 837 Py_UNICODE* data, 838 Py_ssize_t length, 839 const(char)* errors); 840 841 /** 842 Params: 843 string = Encoded string 844 length = size of string 845 mapping = character mapping (char ordinal -> unicode ordinal) 846 errors = error handling 847 */ 848 PyObject* PyUnicode_DecodeCharmap( 849 const(char)* string, 850 Py_ssize_t length, 851 PyObject* mapping, 852 const(char)* errors 853 ); 854 /** 855 Params: 856 unicode = Unicode object 857 mapping = character mapping (unicode ordinal -> char ordinal) 858 */ 859 PyObject* PyUnicode_AsCharmapString( 860 PyObject* unicode, 861 PyObject* mapping); 862 /** 863 Params: 864 data = Unicode char buffer 865 length = Number of Py_UNICODE chars to encode 866 mapping = character mapping (unicode ordinal -> char ordinal) 867 errors = error handling 868 */ 869 PyObject* PyUnicode_EncodeCharmap( 870 Py_UNICODE* data, 871 Py_ssize_t length, 872 PyObject* mapping, 873 const(char)* errors 874 ); 875 /** Translate a Py_UNICODE buffer of the given length by applying a 876 character mapping table to it and return the resulting Unicode 877 object. 878 879 The mapping table must map Unicode ordinal integers to Unicode 880 ordinal integers or None (causing deletion of the character). 881 882 Mapping tables may be dictionaries or sequences. Unmapped character 883 ordinals (ones which cause a LookupError) are left untouched and 884 are copied as-is. 885 886 */ 887 PyObject* PyUnicode_TranslateCharmap( 888 Py_UNICODE* data, 889 Py_ssize_t length, 890 PyObject* table, 891 const(char)* errors 892 ); 893 894 version (Windows) { 895 /// Availability: Windows only 896 PyObject* PyUnicode_DecodeMBCS( 897 const(char)* string, 898 Py_ssize_t length, 899 const(char)* errors); 900 /// Availability: Windows only 901 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 902 /// Availability: Windows only 903 PyObject* PyUnicode_EncodeMBCS( 904 Py_UNICODE* data, 905 Py_ssize_t length, 906 const(char)* errors); 907 } 908 /** Takes a Unicode string holding a decimal value and writes it into 909 an output buffer using standard ASCII digit codes. 910 911 The output buffer has to provide at least length+1 bytes of storage 912 area. The output string is 0-terminated. 913 914 The encoder converts whitespace to ' ', decimal characters to their 915 corresponding ASCII digit and all other Latin-1 characters except 916 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 917 are treated as errors. This includes embedded NULL bytes. 918 919 Error handling is defined by the errors argument: 920 921 NULL or "strict": raise a ValueError 922 "ignore": ignore the wrong characters (these are not copied to the 923 output buffer) 924 "replace": replaces illegal characters with '?' 925 926 Returns 0 on success, -1 on failure. 927 928 */ 929 int PyUnicode_EncodeDecimal( 930 Py_UNICODE* s, 931 Py_ssize_t length, 932 char* output, 933 const(char)* errors); 934 935 /** Concat two strings giving a new Unicode string. */ 936 PyObject* PyUnicode_Concat( 937 PyObject* left, 938 PyObject* right); 939 940 version(Python_3_0_Or_Later) { 941 /** Concat two strings and put the result in *pleft 942 (sets *pleft to NULL on error) 943 Params: 944 pleft = Pointer to left string 945 right = Right string 946 */ 947 /// Availability: 3.* 948 949 void PyUnicode_Append( 950 PyObject** pleft, 951 PyObject* right 952 ); 953 954 /** Concat two strings, put the result in *pleft and drop the right object 955 (sets *pleft to NULL on error) 956 Params: 957 pleft = Pointer to left string 958 */ 959 /// Availability: 3.* 960 void PyUnicode_AppendAndDel( 961 PyObject** pleft, 962 PyObject* right 963 ); 964 } 965 966 /** Split a string giving a list of Unicode strings. 967 968 If sep is NULL, splitting will be done at all whitespace 969 substrings. Otherwise, splits occur at the given separator. 970 971 At most maxsplit splits will be done. If negative, no limit is set. 972 973 Separators are not included in the resulting list. 974 975 */ 976 PyObject* PyUnicode_Split( 977 PyObject* s, 978 PyObject* sep, 979 Py_ssize_t maxsplit); 980 981 /** Ditto PyUnicode_Split, but split at line breaks. 982 983 CRLF is considered to be one line break. Line breaks are not 984 included in the resulting list. */ 985 PyObject* PyUnicode_Splitlines( 986 PyObject* s, 987 int keepends); 988 989 version(Python_2_5_Or_Later) { 990 /** Partition a string using a given separator. */ 991 /// Availability: >= 2.5 992 PyObject* PyUnicode_Partition( 993 PyObject* s, 994 PyObject* sep 995 ); 996 997 /** Partition a string using a given separator, searching from the end 998 of the string. */ 999 1000 PyObject* PyUnicode_RPartition( 1001 PyObject* s, 1002 PyObject* sep 1003 ); 1004 } 1005 1006 /** Split a string giving a list of Unicode strings. 1007 1008 If sep is NULL, splitting will be done at all whitespace 1009 substrings. Otherwise, splits occur at the given separator. 1010 1011 At most maxsplit splits will be done. But unlike PyUnicode_Split 1012 PyUnicode_RSplit splits from the end of the string. If negative, 1013 no limit is set. 1014 1015 Separators are not included in the resulting list. 1016 1017 */ 1018 PyObject* PyUnicode_RSplit( 1019 PyObject* s, 1020 PyObject* sep, 1021 Py_ssize_t maxsplit); 1022 1023 /** Translate a string by applying a character mapping table to it and 1024 return the resulting Unicode object. 1025 1026 The mapping table must map Unicode ordinal integers to Unicode 1027 ordinal integers or None (causing deletion of the character). 1028 1029 Mapping tables may be dictionaries or sequences. Unmapped character 1030 ordinals (ones which cause a LookupError) are left untouched and 1031 are copied as-is. 1032 1033 */ 1034 PyObject* PyUnicode_Translate( 1035 PyObject* str, 1036 PyObject* table, 1037 const(char)* errors); 1038 1039 /** Join a sequence of strings using the given separator and return 1040 the resulting Unicode string. */ 1041 PyObject* PyUnicode_Join( 1042 PyObject* separator, 1043 PyObject* seq); 1044 1045 /** Return 1 if substr matches str[start:end] at the given tail end, 0 1046 otherwise. */ 1047 Py_ssize_t PyUnicode_Tailmatch( 1048 PyObject* str, 1049 PyObject* substr, 1050 Py_ssize_t start, 1051 Py_ssize_t end, 1052 int direction 1053 ); 1054 1055 /** Return the first position of substr in str[start:end] using the 1056 given search direction or -1 if not found. -2 is returned in case 1057 an error occurred and an exception is set. */ 1058 Py_ssize_t PyUnicode_Find( 1059 PyObject* str, 1060 PyObject* substr, 1061 Py_ssize_t start, 1062 Py_ssize_t end, 1063 int direction 1064 ); 1065 1066 /** Count the number of occurrences of substr in str[start:end]. */ 1067 Py_ssize_t PyUnicode_Count( 1068 PyObject* str, 1069 PyObject* substr, 1070 Py_ssize_t start, 1071 Py_ssize_t end); 1072 1073 /** Replace at most maxcount occurrences of substr in str with replstr 1074 and return the resulting Unicode object. */ 1075 PyObject* PyUnicode_Replace( 1076 PyObject* str, 1077 PyObject* substr, 1078 PyObject* replstr, 1079 Py_ssize_t maxcount 1080 ); 1081 1082 /** Compare two strings and return -1, 0, 1 for less than, equal, 1083 greater than resp. */ 1084 int PyUnicode_Compare(PyObject* left, PyObject* right); 1085 version(Python_3_0_Or_Later) { 1086 /** Compare two strings and return -1, 0, 1 for less than, equal, 1087 greater than resp. 1088 Params: 1089 left = 1090 right = ASCII-encoded string 1091 */ 1092 /// Availability: 3.* 1093 int PyUnicode_CompareWithASCIIString( 1094 PyObject* left, 1095 const(char)* right 1096 ); 1097 } 1098 1099 version(Python_2_5_Or_Later) { 1100 /** Rich compare two strings and return one of the following: 1101 1102 - NULL in case an exception was raised 1103 - Py_True or Py_False for successfuly comparisons 1104 - Py_NotImplemented in case the type combination is unknown 1105 1106 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 1107 case the conversion of the arguments to Unicode fails with a 1108 UnicodeDecodeError. 1109 1110 Possible values for op: 1111 1112 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 1113 1114 */ 1115 /// Availability: >= 2.5 1116 PyObject* PyUnicode_RichCompare( 1117 PyObject* left, 1118 PyObject* right, 1119 int op 1120 ); 1121 } 1122 1123 /** Apply a argument tuple or dictionary to a format string and return 1124 the resulting Unicode string. */ 1125 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 1126 1127 /** Checks whether element is contained in container and return 1/0 1128 accordingly. 1129 1130 element has to coerce to an one element Unicode string. -1 is 1131 returned in case of an error. */ 1132 int PyUnicode_Contains(PyObject* container, PyObject* element); 1133 1134 version(Python_3_0_Or_Later) { 1135 /** Checks whether argument is a valid identifier. */ 1136 /// Availability: 3.* 1137 int PyUnicode_IsIdentifier(PyObject* s); 1138 } 1139 1140 1141 /// _ 1142 int _PyUnicode_IsLowercase(Py_UNICODE ch); 1143 /// _ 1144 int _PyUnicode_IsUppercase(Py_UNICODE ch); 1145 /// _ 1146 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 1147 /// _ 1148 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 1149 /// _ 1150 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 1151 /// _ 1152 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 1153 /// _ 1154 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 1155 /// _ 1156 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 1157 /// _ 1158 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 1159 /// _ 1160 int _PyUnicode_ToDigit(Py_UNICODE ch); 1161 /// _ 1162 double _PyUnicode_ToNumeric(Py_UNICODE ch); 1163 /// _ 1164 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 1165 /// _ 1166 int _PyUnicode_IsDigit(Py_UNICODE ch); 1167 /// _ 1168 int _PyUnicode_IsNumeric(Py_UNICODE ch); 1169 /// _ 1170 int _PyUnicode_IsAlpha(Py_UNICODE ch); 1171 1172 }; 1173 1174 /* 1175 pragma(msg,substitute_and_alias(unicode_funs)); 1176 mixin(substitute_and_alias(unicode_funs)); 1177 */ 1178 1179 // waaaa! calling substitute_and_alias breaks linking! 1180 // oh, well. this is probably faster anyways. 1181 // following code is generated by substitute_and_alias. 1182 // don't modify it; modify unicode_funs! 1183 version(Python_3_3_Or_Later) { 1184 version(Python_2_6_Or_Later) { 1185 1186 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 1187 size. 1188 1189 u may be NULL which causes the contents to be undefined. It is the 1190 user's responsibility to fill in the needed data afterwards. Note 1191 that modifying the Unicode object contents after construction is 1192 only allowed if u was set to NULL. 1193 1194 The buffer is copied into the new object. */ 1195 /// Availability: >= 2.6 1196 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 1197 1198 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 1199 /// Availability: >= 2.6 1200 PyObject* PyUnicode_FromStringAndSize( 1201 const(char)*u, /* char buffer */ 1202 Py_ssize_t size /* size of buffer */ 1203 ); 1204 1205 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 1206 Latin-1 encoded bytes */ 1207 /// Availability: >= 2.6 1208 PyObject* PyUnicode_FromString( 1209 const(char)*u /* string */ 1210 ); 1211 1212 /// Availability: >= 2.6 1213 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 1214 1215 /// Availability: >= 2.6 1216 PyObject* PyUnicode_FromFormat(const(char)*, ...); 1217 1218 /** Format the object based on the format_spec, as defined in PEP 3101 1219 (Advanced String Formatting). */ 1220 /// Availability: >= 2.6 1221 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 1222 Py_UNICODE *format_spec, 1223 Py_ssize_t format_spec_len); 1224 1225 /// Availability: >= 2.6 1226 int PyUnicode_ClearFreeList(); 1227 1228 /** 1229 Params: 1230 string = UTF-7 encoded string 1231 length = size of string 1232 error = error handling 1233 consumed = bytes consumed 1234 */ 1235 /// Availability: >= 2.6 1236 PyObject* PyUnicode_DecodeUTF7Stateful( 1237 const(char)* string, 1238 Py_ssize_t length, 1239 const(char)*errors, 1240 Py_ssize_t *consumed 1241 ); 1242 1243 /** 1244 Params: 1245 string = UTF-32 encoded string 1246 length = size of string 1247 error = error handling 1248 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1249 */ 1250 /// Availability: >= 2.6 1251 PyObject* PyUnicode_DecodeUTF32( 1252 const(char)* string, 1253 Py_ssize_t length, 1254 const(char)*errors, 1255 int *byteorder 1256 ); 1257 1258 /** 1259 Params: 1260 string = UTF-32 encoded string 1261 length = size of string 1262 error = error handling 1263 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1264 */ 1265 /// Availability: >= 2.6 1266 PyObject* PyUnicode_DecodeUTF32Stateful( 1267 const(char)*string, 1268 Py_ssize_t length, 1269 const(char)*errors, 1270 int *byteorder, 1271 Py_ssize_t *consumed 1272 ); 1273 1274 /** Returns a Python string using the UTF-32 encoding in native byte 1275 order. The string always starts with a BOM mark. */ 1276 /// Availability: >= 2.6 1277 1278 PyObject* PyUnicode_AsUTF32String( 1279 PyObject *unicode 1280 ); 1281 1282 /** Returns a Python string object holding the UTF-32 encoded value of 1283 the Unicode data. 1284 1285 If byteorder is not 0, output is written according to the following 1286 byte order: 1287 1288 byteorder == -1: little endian 1289 byteorder == 0: native byte order (writes a BOM mark) 1290 byteorder == 1: big endian 1291 1292 If byteorder is 0, the output string will always start with the 1293 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1294 prepended. 1295 Params: 1296 data = Unicode char buffer 1297 length = number of Py_UNICODE chars to encode 1298 errors = error handling 1299 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 1300 1301 */ 1302 /// Availability: >= 2.6 1303 PyObject* PyUnicode_EncodeUTF32( 1304 const Py_UNICODE *data, 1305 Py_ssize_t length, 1306 const(char)* errors, 1307 int byteorder 1308 ); 1309 1310 } 1311 1312 /** Return a read-only pointer to the Unicode object's internal 1313 Py_UNICODE buffer. */ 1314 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 1315 1316 /** Get the length of the Unicode object. */ 1317 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 1318 1319 /** Get the maximum ordinal for a Unicode character. */ 1320 Py_UNICODE PyUnicode_GetMax(); 1321 1322 /** Resize an already allocated Unicode object to the new size length. 1323 1324 _*unicode is modified to point to the new (resized) object and 0 1325 returned on success. 1326 1327 This API may only be called by the function which also called the 1328 Unicode constructor. The refcount on the object must be 1. Otherwise, 1329 an error is returned. 1330 1331 Error handling is implemented as follows: an exception is set, -1 1332 is returned and *unicode left untouched. 1333 Params: 1334 unicode = pointer to the new unicode object. 1335 length = New length. 1336 1337 */ 1338 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 1339 1340 /** Coerce obj to an Unicode object and return a reference with 1341 _*incremented* refcount. 1342 1343 Coercion is done in the following way: 1344 1345 1. String and other char buffer compatible objects are decoded 1346 under the assumptions that they contain data using the current 1347 default encoding. Decoding is done in "strict" mode. 1348 1349 2. All other objects (including Unicode objects) raise an 1350 exception. 1351 1352 The API returns NULL in case of an error. The caller is responsible 1353 for decref'ing the returned objects. 1354 1355 */ 1356 PyObject* PyUnicode_FromEncodedObject( 1357 PyObject* obj, 1358 const(char)* encoding, 1359 const(char)* errors); 1360 1361 /** Coerce obj to an Unicode object and return a reference with 1362 _*incremented* refcount. 1363 1364 Unicode objects are passed back as-is (subclasses are converted to 1365 true Unicode objects), all other objects are delegated to 1366 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 1367 using the default encoding as basis for decoding the object. 1368 1369 The API returns NULL in case of an error. The caller is responsible 1370 for decref'ing the returned objects. 1371 1372 */ 1373 PyObject* PyUnicode_FromObject(PyObject* obj); 1374 1375 /** Create a Unicode Object from the whcar_t buffer w of the given 1376 size. 1377 1378 The buffer is copied into the new object. */ 1379 PyObject* PyUnicode_FromWideChar(const(wchar)* w, Py_ssize_t size); 1380 1381 /** Copies the Unicode Object contents into the wchar_t buffer w. At 1382 most size wchar_t characters are copied. 1383 1384 Note that the resulting wchar_t string may or may not be 1385 0-terminated. It is the responsibility of the caller to make sure 1386 that the wchar_t string is 0-terminated in case this is required by 1387 the application. 1388 1389 Returns the number of wchar_t characters copied (excluding a 1390 possibly trailing 0-termination character) or -1 in case of an 1391 error. */ 1392 Py_ssize_t PyUnicode_AsWideChar( 1393 PyUnicodeObject* unicode, 1394 const(wchar)* w, 1395 Py_ssize_t size); 1396 1397 /** Create a Unicode Object from the given Unicode code point ordinal. 1398 1399 The ordinal must be in range(0x10000) on narrow Python builds 1400 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 1401 raised in case it is not. 1402 1403 */ 1404 PyObject* PyUnicode_FromOrdinal(int ordinal); 1405 1406 /** Return a Python string holding the default encoded value of the 1407 Unicode object. 1408 1409 The resulting string is cached in the Unicode object for subsequent 1410 usage by this function. The cached version is needed to implement 1411 the character buffer interface and will live (at least) as long as 1412 the Unicode object itself. 1413 1414 The refcount of the string is *not* incremented. 1415 1416 _*** Exported for internal use by the interpreter only !!! *** 1417 1418 */ 1419 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 1420 1421 /** Returns the currently active default encoding. 1422 1423 The default encoding is currently implemented as run-time settable 1424 process global. This may change in future versions of the 1425 interpreter to become a parameter which is managed on a per-thread 1426 basis. 1427 1428 */ 1429 const(char)* PyUnicode_GetDefaultEncoding(); 1430 1431 /** Sets the currently active default encoding. 1432 1433 Returns 0 on success, -1 in case of an error. 1434 1435 */ 1436 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 1437 1438 /** Create a Unicode object by decoding the encoded string s of the 1439 given size. 1440 Params: 1441 s = encoded string 1442 size = size of buffer 1443 encoding = encoding 1444 errors = error handling 1445 */ 1446 PyObject* PyUnicode_Decode( 1447 const(char)* s, 1448 Py_ssize_t size, 1449 const(char)* encoding, 1450 const(char)* errors); 1451 1452 version(Python_3_0_Or_Later) { 1453 /** Decode a Unicode object unicode and return the result as Python 1454 object. */ 1455 /// Availability: 3.* 1456 1457 PyObject* PyUnicode_AsDecodedObject( 1458 PyObject* unicode, 1459 const(char)* encoding, 1460 const(char)* errors 1461 ); 1462 1463 /** Decode a Unicode object unicode and return the result as Unicode 1464 object. */ 1465 /// Availability: 3.* 1466 1467 PyObject* PyUnicode_AsDecodedUnicode( 1468 PyObject* unicode, 1469 const(char)* encoding, 1470 const(char)* errors 1471 ); 1472 1473 } 1474 1475 /** Encodes a Py_UNICODE buffer of the given size and returns a 1476 Python string object. 1477 Params: 1478 s = Unicode char buffer 1479 size = number of Py_UNICODE chars to encode 1480 encoding = encoding 1481 errors = error handling 1482 */ 1483 PyObject* PyUnicode_Encode( 1484 Py_UNICODE* s, 1485 Py_ssize_t size, 1486 const(char)* encoding, 1487 const(char)* errors); 1488 1489 /** Encodes a Unicode object and returns the result as Python object. 1490 */ 1491 PyObject* PyUnicode_AsEncodedObject( 1492 PyObject* unicode, 1493 const(char)* encoding, 1494 const(char)* errors); 1495 1496 /** Encodes a Unicode object and returns the result as Python string 1497 object. */ 1498 PyObject* PyUnicode_AsEncodedString( 1499 PyObject* unicode, 1500 const(char)* encoding, 1501 const(char)* errors); 1502 1503 version(Python_3_0_Or_Later) { 1504 /** Encodes a Unicode object and returns the result as Unicode 1505 object. */ 1506 /// Availability: >= 3.* 1507 PyObject* PyUnicode_AsEncodedUnicode( 1508 PyObject* unicode, 1509 const(char)* encoding, 1510 const(char)* errors 1511 ); 1512 1513 } 1514 1515 /** 1516 Params: 1517 string = UTF-7 encoded string 1518 length = size of string 1519 errors = error handling 1520 */ 1521 PyObject* PyUnicode_DecodeUTF7( 1522 const(char)* string, 1523 Py_ssize_t length, 1524 const(char)* errors); 1525 1526 /** 1527 Params: 1528 data = Unicode char buffer 1529 length = number of Py_UNICODE chars to encode 1530 base64SetO = Encode RFC2152 Set O characters in base64 1531 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 1532 errors = error handling 1533 */ 1534 PyObject* PyUnicode_EncodeUTF7( 1535 Py_UNICODE* data, 1536 Py_ssize_t length, 1537 int encodeSetO, 1538 int encodeWhiteSpace, 1539 const(char)* errors 1540 ); 1541 1542 /// _ 1543 PyObject* PyUnicode_DecodeUTF8( 1544 const(char)* string, 1545 Py_ssize_t length, 1546 const(char)* errors); 1547 1548 /// _ 1549 PyObject* PyUnicode_DecodeUTF8Stateful( 1550 const(char)* string, 1551 Py_ssize_t length, 1552 const(char)* errors, 1553 Py_ssize_t* consumed 1554 ); 1555 1556 /// _ 1557 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 1558 1559 /// _ 1560 PyObject* PyUnicode_EncodeUTF8( 1561 Py_UNICODE* data, 1562 Py_ssize_t length, 1563 const(char) *errors); 1564 1565 1566 1567 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 1568 the corresponding Unicode object. 1569 1570 errors (if non-NULL) defines the error handling. It defaults 1571 to "strict". 1572 1573 If byteorder is non-NULL, the decoder starts decoding using the 1574 given byte order: 1575 1576 *byteorder == -1: little endian 1577 *byteorder == 0: native order 1578 *byteorder == 1: big endian 1579 1580 In native mode, the first two bytes of the stream are checked for a 1581 BOM mark. If found, the BOM mark is analysed, the byte order 1582 adjusted and the BOM skipped. In the other modes, no BOM mark 1583 interpretation is done. After completion, *byteorder is set to the 1584 current byte order at the end of input data. 1585 1586 If byteorder is NULL, the codec starts in native order mode. 1587 1588 */ 1589 PyObject* PyUnicode_DecodeUTF16( 1590 const(char)* string, 1591 Py_ssize_t length, 1592 const(char)* errors, 1593 int* byteorder); 1594 1595 1596 /** 1597 Params: 1598 string = UTF-16 encoded string 1599 length = size of string 1600 errors = error handling 1601 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1602 consumed = bytes consumed 1603 */ 1604 PyObject* PyUnicode_DecodeUTF16Stateful( 1605 const(char)* string, 1606 Py_ssize_t length, 1607 const(char)* errors, 1608 int* byteorder, 1609 Py_ssize_t* consumed 1610 ); 1611 1612 1613 /** Returns a Python string using the UTF-16 encoding in native byte 1614 order. The string always starts with a BOM mark. */ 1615 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 1616 1617 1618 /** Returns a Python string object holding the UTF-16 encoded value of 1619 the Unicode data. 1620 1621 If byteorder is not 0, output is written according to the following 1622 byte order: 1623 1624 byteorder == -1: little endian 1625 byteorder == 0: native byte order (writes a BOM mark) 1626 byteorder == 1: big endian 1627 1628 If byteorder is 0, the output string will always start with the 1629 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1630 prepended. 1631 1632 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1633 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1634 at a later point without compromising the APIs. 1635 1636 */ 1637 PyObject* PyUnicode_EncodeUTF16( 1638 Py_UNICODE* data, 1639 Py_ssize_t length, 1640 const(char)* errors, 1641 int byteorder 1642 ); 1643 1644 1645 1646 /// _ 1647 PyObject* PyUnicode_DecodeUnicodeEscape( 1648 const(char)* string, 1649 Py_ssize_t length, 1650 const(char)* errors); 1651 1652 1653 /// _ 1654 PyObject* PyUnicode_AsUnicodeEscapeString( 1655 PyObject* unicode); 1656 1657 1658 /// _ 1659 PyObject* PyUnicode_EncodeUnicodeEscape( 1660 Py_UNICODE* data, 1661 Py_ssize_t length); 1662 1663 1664 /** 1665 Params: 1666 string = Raw-Unicode-Escape encoded string 1667 length = size of string 1668 errors = error handling 1669 */ 1670 PyObject* PyUnicode_DecodeRawUnicodeEscape( 1671 const(char)* string, 1672 Py_ssize_t length, 1673 const(char)* errors); 1674 1675 /// _ 1676 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 1677 1678 /// _ 1679 PyObject* PyUnicode_EncodeRawUnicodeEscape( 1680 Py_UNICODE* data, Py_ssize_t length); 1681 1682 /// _ 1683 PyObject* _PyUnicode_DecodeUnicodeInternal( 1684 const(char)* string, 1685 Py_ssize_t length, 1686 const(char)* errors); 1687 1688 /** 1689 Params: 1690 string = Latin-1 encoded string 1691 length = size of string 1692 errors = error handling 1693 */ 1694 PyObject* PyUnicode_DecodeLatin1( 1695 const(char)* string, 1696 Py_ssize_t length, 1697 const(char)* errors); 1698 1699 /// _ 1700 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 1701 1702 /** 1703 Params: 1704 data = Unicode char buffer 1705 length = Number of Py_UNICODE chars to encode 1706 errors = error handling 1707 */ 1708 PyObject* PyUnicode_EncodeLatin1( 1709 Py_UNICODE* data, 1710 Py_ssize_t length, 1711 const(char)* errors); 1712 1713 /** 1714 Params: 1715 data = Unicode char buffer 1716 length = Number of Py_UNICODE chars to encode 1717 errors = error handling 1718 */ 1719 PyObject* PyUnicode_DecodeASCII( 1720 const(char)* string, 1721 Py_ssize_t length, 1722 const(char)* errors); 1723 1724 /// _ 1725 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 1726 1727 /** 1728 Params: 1729 data = Unicode char buffer 1730 length = Number of Py_UNICODE chars to encode 1731 errors = error handling 1732 */ 1733 PyObject* PyUnicode_EncodeASCII( 1734 Py_UNICODE* data, 1735 Py_ssize_t length, 1736 const(char)* errors); 1737 1738 /** 1739 Params: 1740 string = Encoded string 1741 length = size of string 1742 mapping = character mapping (char ordinal -> unicode ordinal) 1743 errors = error handling 1744 */ 1745 PyObject* PyUnicode_DecodeCharmap( 1746 const(char)* string, 1747 Py_ssize_t length, 1748 PyObject* mapping, 1749 const(char)* errors 1750 ); 1751 1752 /** 1753 Params: 1754 unicode = Unicode object 1755 mapping = character mapping (unicode ordinal -> char ordinal) 1756 */ 1757 PyObject* PyUnicode_AsCharmapString( 1758 PyObject* unicode, 1759 PyObject* mapping); 1760 1761 /** 1762 Params: 1763 data = Unicode char buffer 1764 length = Number of Py_UNICODE chars to encode 1765 mapping = character mapping (unicode ordinal -> char ordinal) 1766 errors = error handling 1767 */ 1768 PyObject* PyUnicode_EncodeCharmap( 1769 Py_UNICODE* data, 1770 Py_ssize_t length, 1771 PyObject* mapping, 1772 const(char)* errors 1773 ); 1774 1775 /** Translate a Py_UNICODE buffer of the given length by applying a 1776 character mapping table to it and return the resulting Unicode 1777 object. 1778 1779 The mapping table must map Unicode ordinal integers to Unicode 1780 ordinal integers or None (causing deletion of the character). 1781 1782 Mapping tables may be dictionaries or sequences. Unmapped character 1783 ordinals (ones which cause a LookupError) are left untouched and 1784 are copied as-is. 1785 1786 */ 1787 PyObject* PyUnicode_TranslateCharmap( 1788 Py_UNICODE* data, 1789 Py_ssize_t length, 1790 PyObject* table, 1791 const(char)* errors 1792 ); 1793 1794 version (Windows) { 1795 /// Availability: Windows only 1796 PyObject* PyUnicode_DecodeMBCS( 1797 const(char)* string, 1798 Py_ssize_t length, 1799 const(char)* errors); 1800 1801 /// Availability: Windows only 1802 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 1803 1804 /// Availability: Windows only 1805 PyObject* PyUnicode_EncodeMBCS( 1806 Py_UNICODE* data, 1807 Py_ssize_t length, 1808 const(char)* errors); 1809 1810 } 1811 /** Takes a Unicode string holding a decimal value and writes it into 1812 an output buffer using standard ASCII digit codes. 1813 1814 The output buffer has to provide at least length+1 bytes of storage 1815 area. The output string is 0-terminated. 1816 1817 The encoder converts whitespace to ' ', decimal characters to their 1818 corresponding ASCII digit and all other Latin-1 characters except 1819 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1820 are treated as errors. This includes embedded NULL bytes. 1821 1822 Error handling is defined by the errors argument: 1823 1824 NULL or "strict": raise a ValueError 1825 "ignore": ignore the wrong characters (these are not copied to the 1826 output buffer) 1827 "replace": replaces illegal characters with '?' 1828 1829 Returns 0 on success, -1 on failure. 1830 1831 */ 1832 int PyUnicode_EncodeDecimal( 1833 Py_UNICODE* s, 1834 Py_ssize_t length, 1835 char* output, 1836 const(char)* errors); 1837 1838 /** Concat two strings giving a new Unicode string. */ 1839 PyObject* PyUnicode_Concat( 1840 PyObject* left, 1841 PyObject* right); 1842 1843 version(Python_3_0_Or_Later) { 1844 /** Concat two strings and put the result in *pleft 1845 (sets *pleft to NULL on error) 1846 Params: 1847 pleft = Pointer to left string 1848 right = Right string 1849 */ 1850 /// Availability: 3.* 1851 1852 void PyUnicode_Append( 1853 PyObject** pleft, 1854 PyObject* right 1855 ); 1856 1857 /** Concat two strings, put the result in *pleft and drop the right object 1858 (sets *pleft to NULL on error) 1859 Params: 1860 pleft = Pointer to left string 1861 */ 1862 /// Availability: 3.* 1863 void PyUnicode_AppendAndDel( 1864 PyObject** pleft, 1865 PyObject* right 1866 ); 1867 1868 } 1869 1870 /** Split a string giving a list of Unicode strings. 1871 1872 If sep is NULL, splitting will be done at all whitespace 1873 substrings. Otherwise, splits occur at the given separator. 1874 1875 At most maxsplit splits will be done. If negative, no limit is set. 1876 1877 Separators are not included in the resulting list. 1878 1879 */ 1880 PyObject* PyUnicode_Split( 1881 PyObject* s, 1882 PyObject* sep, 1883 Py_ssize_t maxsplit); 1884 1885 /** Ditto PyUnicode_Split, but split at line breaks. 1886 1887 CRLF is considered to be one line break. Line breaks are not 1888 included in the resulting list. */ 1889 PyObject* PyUnicode_Splitlines( 1890 PyObject* s, 1891 int keepends); 1892 1893 version(Python_2_5_Or_Later) { 1894 /** Partition a string using a given separator. */ 1895 /// Availability: >= 2.5 1896 PyObject* PyUnicode_Partition( 1897 PyObject* s, 1898 PyObject* sep 1899 ); 1900 1901 1902 /** Partition a string using a given separator, searching from the end 1903 of the string. */ 1904 1905 PyObject* PyUnicode_RPartition( 1906 PyObject* s, 1907 PyObject* sep 1908 ); 1909 1910 } 1911 1912 /** Split a string giving a list of Unicode strings. 1913 1914 If sep is NULL, splitting will be done at all whitespace 1915 substrings. Otherwise, splits occur at the given separator. 1916 1917 At most maxsplit splits will be done. But unlike PyUnicode_Split 1918 PyUnicode_RSplit splits from the end of the string. If negative, 1919 no limit is set. 1920 1921 Separators are not included in the resulting list. 1922 1923 */ 1924 PyObject* PyUnicode_RSplit( 1925 PyObject* s, 1926 PyObject* sep, 1927 Py_ssize_t maxsplit); 1928 1929 1930 /** Translate a string by applying a character mapping table to it and 1931 return the resulting Unicode object. 1932 1933 The mapping table must map Unicode ordinal integers to Unicode 1934 ordinal integers or None (causing deletion of the character). 1935 1936 Mapping tables may be dictionaries or sequences. Unmapped character 1937 ordinals (ones which cause a LookupError) are left untouched and 1938 are copied as-is. 1939 1940 */ 1941 PyObject* PyUnicode_Translate( 1942 PyObject* str, 1943 PyObject* table, 1944 const(char)* errors); 1945 1946 /** Join a sequence of strings using the given separator and return 1947 the resulting Unicode string. */ 1948 PyObject* PyUnicode_Join( 1949 PyObject* separator, 1950 PyObject* seq); 1951 1952 /** Return 1 if substr matches str[start:end] at the given tail end, 0 1953 otherwise. */ 1954 Py_ssize_t PyUnicode_Tailmatch( 1955 PyObject* str, 1956 PyObject* substr, 1957 Py_ssize_t start, 1958 Py_ssize_t end, 1959 int direction 1960 ); 1961 1962 1963 /** Return the first position of substr in str[start:end] using the 1964 given search direction or -1 if not found. -2 is returned in case 1965 an error occurred and an exception is set. */ 1966 Py_ssize_t PyUnicode_Find( 1967 PyObject* str, 1968 PyObject* substr, 1969 Py_ssize_t start, 1970 Py_ssize_t end, 1971 int direction 1972 ); 1973 1974 /** Count the number of occurrences of substr in str[start:end]. */ 1975 Py_ssize_t PyUnicode_Count( 1976 PyObject* str, 1977 PyObject* substr, 1978 Py_ssize_t start, 1979 Py_ssize_t end); 1980 1981 /** Replace at most maxcount occurrences of substr in str with replstr 1982 and return the resulting Unicode object. */ 1983 PyObject* PyUnicode_Replace( 1984 PyObject* str, 1985 PyObject* substr, 1986 PyObject* replstr, 1987 Py_ssize_t maxcount 1988 ); 1989 1990 /** Compare two strings and return -1, 0, 1 for less than, equal, 1991 greater than resp. */ 1992 int PyUnicode_Compare(PyObject* left, PyObject* right); 1993 1994 version(Python_3_0_Or_Later) { 1995 /** Compare two strings and return -1, 0, 1 for less than, equal, 1996 greater than resp. 1997 Params: 1998 left = 1999 right = ASCII-encoded string 2000 */ 2001 /// Availability: 3.* 2002 int PyUnicode_CompareWithASCIIString( 2003 PyObject* left, 2004 const(char)* right 2005 ); 2006 } 2007 2008 version(Python_2_5_Or_Later) { 2009 /** Rich compare two strings and return one of the following: 2010 2011 - NULL in case an exception was raised 2012 - Py_True or Py_False for successfuly comparisons 2013 - Py_NotImplemented in case the type combination is unknown 2014 2015 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 2016 case the conversion of the arguments to Unicode fails with a 2017 UnicodeDecodeError. 2018 2019 Possible values for op: 2020 2021 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 2022 2023 */ 2024 /// Availability: >= 2.5 2025 PyObject* PyUnicode_RichCompare( 2026 PyObject* left, 2027 PyObject* right, 2028 int op 2029 ); 2030 } 2031 2032 /** Apply a argument tuple or dictionary to a format string and return 2033 the resulting Unicode string. */ 2034 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 2035 2036 /** Checks whether element is contained in container and return 1/0 2037 accordingly. 2038 2039 element has to coerce to an one element Unicode string. -1 is 2040 returned in case of an error. */ 2041 int PyUnicode_Contains(PyObject* container, PyObject* element); 2042 2043 version(Python_3_0_Or_Later) { 2044 /** Checks whether argument is a valid identifier. */ 2045 /// Availability: 3.* 2046 int PyUnicode_IsIdentifier(PyObject* s); 2047 } 2048 2049 2050 /// _ 2051 int _PyUnicode_IsLowercase(Py_UNICODE ch); 2052 2053 /// _ 2054 int _PyUnicode_IsUppercase(Py_UNICODE ch); 2055 2056 /// _ 2057 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 2058 2059 /// _ 2060 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 2061 2062 /// _ 2063 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 2064 2065 /// _ 2066 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 2067 2068 /// _ 2069 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 2070 2071 /// _ 2072 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 2073 2074 /// _ 2075 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 2076 2077 /// _ 2078 int _PyUnicode_ToDigit(Py_UNICODE ch); 2079 2080 /// _ 2081 double _PyUnicode_ToNumeric(Py_UNICODE ch); 2082 2083 /// _ 2084 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 2085 2086 /// _ 2087 int _PyUnicode_IsDigit(Py_UNICODE ch); 2088 2089 /// _ 2090 int _PyUnicode_IsNumeric(Py_UNICODE ch); 2091 2092 /// _ 2093 int _PyUnicode_IsAlpha(Py_UNICODE ch); 2094 2095 }else version(Python_Unicode_UCS2) { 2096 2097 version(Python_2_6_Or_Later) { 2098 2099 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 2100 size. 2101 2102 u may be NULL which causes the contents to be undefined. It is the 2103 user's responsibility to fill in the needed data afterwards. Note 2104 that modifying the Unicode object contents after construction is 2105 only allowed if u was set to NULL. 2106 2107 The buffer is copied into the new object. */ 2108 /// Availability: >= 2.6 2109 PyObject* PyUnicodeUCS2_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 2110 /// ditto 2111 2112 alias PyUnicodeUCS2_FromUnicode PyUnicode_FromUnicode; 2113 2114 2115 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 2116 /// Availability: >= 2.6 2117 PyObject* PyUnicodeUCS2_FromStringAndSize( 2118 const(char)*u, /* char buffer */ 2119 Py_ssize_t size /* size of buffer */ 2120 ); 2121 /// ditto 2122 2123 alias PyUnicodeUCS2_FromStringAndSize PyUnicode_FromStringAndSize; 2124 2125 2126 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 2127 Latin-1 encoded bytes */ 2128 /// Availability: >= 2.6 2129 PyObject* PyUnicodeUCS2_FromString( 2130 const(char)*u /* string */ 2131 ); 2132 /// ditto 2133 2134 alias PyUnicodeUCS2_FromString PyUnicode_FromString; 2135 2136 /// Availability: >= 2.6 2137 PyObject* PyUnicodeUCS2_FromFormatV(const(char)*, va_list); 2138 /// ditto 2139 2140 alias PyUnicodeUCS2_FromFormatV PyUnicode_FromFormatV; 2141 2142 /// Availability: >= 2.6 2143 PyObject* PyUnicodeUCS2_FromFormat(const(char)*, ...); 2144 /// ditto 2145 2146 alias PyUnicodeUCS2_FromFormat PyUnicode_FromFormat; 2147 2148 2149 /** Format the object based on the format_spec, as defined in PEP 3101 2150 (Advanced String Formatting). */ 2151 /// Availability: >= 2.6 2152 PyObject* _PyUnicodeUCS2_FormatAdvanced(PyObject *obj, 2153 Py_UNICODE *format_spec, 2154 Py_ssize_t format_spec_len); 2155 /// ditto 2156 2157 alias _PyUnicodeUCS2_FormatAdvanced _PyUnicode_FormatAdvanced; 2158 2159 /// Availability: >= 2.6 2160 int PyUnicodeUCS2_ClearFreeList(); 2161 /// ditto 2162 2163 alias PyUnicodeUCS2_ClearFreeList PyUnicode_ClearFreeList; 2164 2165 /** 2166 Params: 2167 string = UTF-7 encoded string 2168 length = size of string 2169 error = error handling 2170 consumed = bytes consumed 2171 */ 2172 /// Availability: >= 2.6 2173 PyObject* PyUnicodeUCS2_DecodeUTF7Stateful( 2174 const(char)* string, 2175 Py_ssize_t length, 2176 const(char)*errors, 2177 Py_ssize_t *consumed 2178 ); 2179 /// ditto 2180 2181 alias PyUnicodeUCS2_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 2182 2183 /** 2184 Params: 2185 string = UTF-32 encoded string 2186 length = size of string 2187 error = error handling 2188 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2189 */ 2190 /// Availability: >= 2.6 2191 PyObject* PyUnicodeUCS2_DecodeUTF32( 2192 const(char)* string, 2193 Py_ssize_t length, 2194 const(char)*errors, 2195 int *byteorder 2196 ); 2197 /// ditto 2198 2199 alias PyUnicodeUCS2_DecodeUTF32 PyUnicode_DecodeUTF32; 2200 2201 2202 /** 2203 Params: 2204 string = UTF-32 encoded string 2205 length = size of string 2206 error = error handling 2207 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2208 */ 2209 /// Availability: >= 2.6 2210 PyObject* PyUnicodeUCS2_DecodeUTF32Stateful( 2211 const(char)*string, 2212 Py_ssize_t length, 2213 const(char)*errors, 2214 int *byteorder, 2215 Py_ssize_t *consumed 2216 ); 2217 /// ditto 2218 2219 alias PyUnicodeUCS2_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 2220 2221 /** Returns a Python string using the UTF-32 encoding in native byte 2222 order. The string always starts with a BOM mark. */ 2223 /// Availability: >= 2.6 2224 2225 PyObject* PyUnicodeUCS2_AsUTF32String( 2226 PyObject *unicode 2227 ); 2228 /// ditto 2229 2230 alias PyUnicodeUCS2_AsUTF32String PyUnicode_AsUTF32String; 2231 2232 2233 /** Returns a Python string object holding the UTF-32 encoded value of 2234 the Unicode data. 2235 2236 If byteorder is not 0, output is written according to the following 2237 byte order: 2238 2239 byteorder == -1: little endian 2240 byteorder == 0: native byte order (writes a BOM mark) 2241 byteorder == 1: big endian 2242 2243 If byteorder is 0, the output string will always start with the 2244 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2245 prepended. 2246 Params: 2247 data = Unicode char buffer 2248 length = number of Py_UNICODE chars to encode 2249 errors = error handling 2250 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 2251 2252 */ 2253 /// Availability: >= 2.6 2254 PyObject* PyUnicodeUCS2_EncodeUTF32( 2255 const Py_UNICODE *data, 2256 Py_ssize_t length, 2257 const(char)* errors, 2258 int byteorder 2259 ); 2260 /// ditto 2261 2262 alias PyUnicodeUCS2_EncodeUTF32 PyUnicode_EncodeUTF32; 2263 2264 } 2265 2266 /** Return a read-only pointer to the Unicode object's internal 2267 Py_UNICODE buffer. */ 2268 Py_UNICODE* PyUnicodeUCS2_AsUnicode(PyObject* unicode); 2269 /// ditto 2270 2271 alias PyUnicodeUCS2_AsUnicode PyUnicode_AsUnicode; 2272 2273 /** Get the length of the Unicode object. */ 2274 Py_ssize_t PyUnicodeUCS2_GetSize(PyObject* unicode); 2275 /// ditto 2276 2277 alias PyUnicodeUCS2_GetSize PyUnicode_GetSize; 2278 2279 2280 /** Get the maximum ordinal for a Unicode character. */ 2281 Py_UNICODE PyUnicodeUCS2_GetMax(); 2282 /// ditto 2283 2284 alias PyUnicodeUCS2_GetMax PyUnicode_GetMax; 2285 2286 2287 /** Resize an already allocated Unicode object to the new size length. 2288 2289 _*unicode is modified to point to the new (resized) object and 0 2290 returned on success. 2291 2292 This API may only be called by the function which also called the 2293 Unicode constructor. The refcount on the object must be 1. Otherwise, 2294 an error is returned. 2295 2296 Error handling is implemented as follows: an exception is set, -1 2297 is returned and *unicode left untouched. 2298 Params: 2299 unicode = pointer to the new unicode object. 2300 length = New length. 2301 2302 */ 2303 int PyUnicodeUCS2_Resize(PyObject** unicode, Py_ssize_t length); 2304 /// ditto 2305 2306 alias PyUnicodeUCS2_Resize PyUnicode_Resize; 2307 2308 /** Coerce obj to an Unicode object and return a reference with 2309 _*incremented* refcount. 2310 2311 Coercion is done in the following way: 2312 2313 1. String and other char buffer compatible objects are decoded 2314 under the assumptions that they contain data using the current 2315 default encoding. Decoding is done in "strict" mode. 2316 2317 2. All other objects (including Unicode objects) raise an 2318 exception. 2319 2320 The API returns NULL in case of an error. The caller is responsible 2321 for decref'ing the returned objects. 2322 2323 */ 2324 PyObject* PyUnicodeUCS2_FromEncodedObject( 2325 PyObject* obj, 2326 const(char)* encoding, 2327 const(char)* errors); 2328 /// ditto 2329 2330 alias PyUnicodeUCS2_FromEncodedObject PyUnicode_FromEncodedObject; 2331 2332 2333 /** Coerce obj to an Unicode object and return a reference with 2334 _*incremented* refcount. 2335 2336 Unicode objects are passed back as-is (subclasses are converted to 2337 true Unicode objects), all other objects are delegated to 2338 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 2339 using the default encoding as basis for decoding the object. 2340 2341 The API returns NULL in case of an error. The caller is responsible 2342 for decref'ing the returned objects. 2343 2344 */ 2345 PyObject* PyUnicodeUCS2_FromObject(PyObject* obj); 2346 /// ditto 2347 2348 alias PyUnicodeUCS2_FromObject PyUnicode_FromObject; 2349 2350 2351 /** Create a Unicode Object from the whcar_t buffer w of the given 2352 size. 2353 2354 The buffer is copied into the new object. */ 2355 PyObject* PyUnicodeUCS2_FromWideChar(const(wchar)* w, Py_ssize_t size); 2356 /// ditto 2357 2358 alias PyUnicodeUCS2_FromWideChar PyUnicode_FromWideChar; 2359 2360 2361 /** Copies the Unicode Object contents into the wchar_t buffer w. At 2362 most size wchar_t characters are copied. 2363 2364 Note that the resulting wchar_t string may or may not be 2365 0-terminated. It is the responsibility of the caller to make sure 2366 that the wchar_t string is 0-terminated in case this is required by 2367 the application. 2368 2369 Returns the number of wchar_t characters copied (excluding a 2370 possibly trailing 0-termination character) or -1 in case of an 2371 error. */ 2372 Py_ssize_t PyUnicodeUCS2_AsWideChar( 2373 PyUnicodeObject* unicode, 2374 const(wchar)* w, 2375 Py_ssize_t size); 2376 /// ditto 2377 2378 alias PyUnicodeUCS2_AsWideChar PyUnicode_AsWideChar; 2379 2380 2381 /** Create a Unicode Object from the given Unicode code point ordinal. 2382 2383 The ordinal must be in range(0x10000) on narrow Python builds 2384 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 2385 raised in case it is not. 2386 2387 */ 2388 PyObject* PyUnicodeUCS2_FromOrdinal(int ordinal); 2389 /// ditto 2390 2391 alias PyUnicodeUCS2_FromOrdinal PyUnicode_FromOrdinal; 2392 2393 2394 /** Return a Python string holding the default encoded value of the 2395 Unicode object. 2396 2397 The resulting string is cached in the Unicode object for subsequent 2398 usage by this function. The cached version is needed to implement 2399 the character buffer interface and will live (at least) as long as 2400 the Unicode object itself. 2401 2402 The refcount of the string is *not* incremented. 2403 2404 _*** Exported for internal use by the interpreter only !!! *** 2405 2406 */ 2407 PyObject* _PyUnicodeUCS2_AsDefaultEncodedString(PyObject *, const(char)*); 2408 /// ditto 2409 2410 alias _PyUnicodeUCS2_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 2411 2412 2413 /** Returns the currently active default encoding. 2414 2415 The default encoding is currently implemented as run-time settable 2416 process global. This may change in future versions of the 2417 interpreter to become a parameter which is managed on a per-thread 2418 basis. 2419 2420 */ 2421 const(char)* PyUnicodeUCS2_GetDefaultEncoding(); 2422 /// ditto 2423 2424 alias PyUnicodeUCS2_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 2425 2426 2427 /** Sets the currently active default encoding. 2428 2429 Returns 0 on success, -1 in case of an error. 2430 2431 */ 2432 int PyUnicodeUCS2_SetDefaultEncoding(const(char)*encoding); 2433 /// ditto 2434 2435 alias PyUnicodeUCS2_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 2436 2437 2438 /** Create a Unicode object by decoding the encoded string s of the 2439 given size. 2440 Params: 2441 s = encoded string 2442 size = size of buffer 2443 encoding = encoding 2444 errors = error handling 2445 */ 2446 PyObject* PyUnicodeUCS2_Decode( 2447 const(char)* s, 2448 Py_ssize_t size, 2449 const(char)* encoding, 2450 const(char)* errors); 2451 /// ditto 2452 2453 alias PyUnicodeUCS2_Decode PyUnicode_Decode; 2454 2455 2456 version(Python_3_0_Or_Later) { 2457 /** Decode a Unicode object unicode and return the result as Python 2458 object. */ 2459 /// Availability: 3.* 2460 2461 PyObject* PyUnicodeUCS2_AsDecodedObject( 2462 PyObject* unicode, 2463 const(char)* encoding, 2464 const(char)* errors 2465 ); 2466 /// ditto 2467 2468 alias PyUnicodeUCS2_AsDecodedObject PyUnicode_AsDecodedObject; 2469 2470 /** Decode a Unicode object unicode and return the result as Unicode 2471 object. */ 2472 /// Availability: 3.* 2473 2474 PyObject* PyUnicodeUCS2_AsDecodedUnicode( 2475 PyObject* unicode, 2476 const(char)* encoding, 2477 const(char)* errors 2478 ); 2479 /// ditto 2480 2481 alias PyUnicodeUCS2_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 2482 2483 } 2484 2485 /** Encodes a Py_UNICODE buffer of the given size and returns a 2486 Python string object. 2487 Params: 2488 s = Unicode char buffer 2489 size = number of Py_UNICODE chars to encode 2490 encoding = encoding 2491 errors = error handling 2492 */ 2493 PyObject* PyUnicodeUCS2_Encode( 2494 Py_UNICODE* s, 2495 Py_ssize_t size, 2496 const(char)* encoding, 2497 const(char)* errors); 2498 /// ditto 2499 2500 alias PyUnicodeUCS2_Encode PyUnicode_Encode; 2501 2502 2503 /** Encodes a Unicode object and returns the result as Python object. 2504 */ 2505 PyObject* PyUnicodeUCS2_AsEncodedObject( 2506 PyObject* unicode, 2507 const(char)* encoding, 2508 const(char)* errors); 2509 /// ditto 2510 2511 alias PyUnicodeUCS2_AsEncodedObject PyUnicode_AsEncodedObject; 2512 2513 2514 /** Encodes a Unicode object and returns the result as Python string 2515 object. */ 2516 PyObject* PyUnicodeUCS2_AsEncodedString( 2517 PyObject* unicode, 2518 const(char)* encoding, 2519 const(char)* errors); 2520 /// ditto 2521 2522 alias PyUnicodeUCS2_AsEncodedString PyUnicode_AsEncodedString; 2523 2524 2525 version(Python_3_0_Or_Later) { 2526 /** Encodes a Unicode object and returns the result as Unicode 2527 object. */ 2528 /// Availability: >= 3.* 2529 PyObject* PyUnicodeUCS2_AsEncodedUnicode( 2530 PyObject* unicode, 2531 const(char)* encoding, 2532 const(char)* errors 2533 ); 2534 /// ditto 2535 2536 alias PyUnicodeUCS2_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 2537 2538 } 2539 2540 /** 2541 Params: 2542 string = UTF-7 encoded string 2543 length = size of string 2544 errors = error handling 2545 */ 2546 PyObject* PyUnicodeUCS2_DecodeUTF7( 2547 const(char)* string, 2548 Py_ssize_t length, 2549 const(char)* errors); 2550 /// ditto 2551 2552 alias PyUnicodeUCS2_DecodeUTF7 PyUnicode_DecodeUTF7; 2553 2554 2555 /** 2556 Params: 2557 data = Unicode char buffer 2558 length = number of Py_UNICODE chars to encode 2559 base64SetO = Encode RFC2152 Set O characters in base64 2560 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 2561 errors = error handling 2562 */ 2563 PyObject* PyUnicodeUCS2_EncodeUTF7( 2564 Py_UNICODE* data, 2565 Py_ssize_t length, 2566 int encodeSetO, 2567 int encodeWhiteSpace, 2568 const(char)* errors 2569 ); 2570 /// ditto 2571 2572 alias PyUnicodeUCS2_EncodeUTF7 PyUnicode_EncodeUTF7; 2573 2574 2575 /// _ 2576 PyObject* PyUnicodeUCS2_DecodeUTF8( 2577 const(char)* string, 2578 Py_ssize_t length, 2579 const(char)* errors); 2580 /// ditto 2581 2582 alias PyUnicodeUCS2_DecodeUTF8 PyUnicode_DecodeUTF8; 2583 2584 /// _ 2585 PyObject* PyUnicodeUCS2_DecodeUTF8Stateful( 2586 const(char)* string, 2587 Py_ssize_t length, 2588 const(char)* errors, 2589 Py_ssize_t* consumed 2590 ); 2591 /// ditto 2592 2593 alias PyUnicodeUCS2_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 2594 2595 /// _ 2596 PyObject* PyUnicodeUCS2_AsUTF8String(PyObject* unicode); 2597 /// ditto 2598 2599 alias PyUnicodeUCS2_AsUTF8String PyUnicode_AsUTF8String; 2600 2601 /// _ 2602 PyObject* PyUnicodeUCS2_EncodeUTF8( 2603 Py_UNICODE* data, 2604 Py_ssize_t length, 2605 const(char) *errors); 2606 /// ditto 2607 2608 alias PyUnicodeUCS2_EncodeUTF8 PyUnicode_EncodeUTF8; 2609 2610 2611 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 2612 the corresponding Unicode object. 2613 2614 errors (if non-NULL) defines the error handling. It defaults 2615 to "strict". 2616 2617 If byteorder is non-NULL, the decoder starts decoding using the 2618 given byte order: 2619 2620 *byteorder == -1: little endian 2621 *byteorder == 0: native order 2622 *byteorder == 1: big endian 2623 2624 In native mode, the first two bytes of the stream are checked for a 2625 BOM mark. If found, the BOM mark is analysed, the byte order 2626 adjusted and the BOM skipped. In the other modes, no BOM mark 2627 interpretation is done. After completion, *byteorder is set to the 2628 current byte order at the end of input data. 2629 2630 If byteorder is NULL, the codec starts in native order mode. 2631 2632 */ 2633 PyObject* PyUnicodeUCS2_DecodeUTF16( 2634 const(char)* string, 2635 Py_ssize_t length, 2636 const(char)* errors, 2637 int* byteorder); 2638 /// ditto 2639 2640 alias PyUnicodeUCS2_DecodeUTF16 PyUnicode_DecodeUTF16; 2641 2642 /** 2643 Params: 2644 string = UTF-16 encoded string 2645 length = size of string 2646 errors = error handling 2647 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2648 consumed = bytes consumed 2649 */ 2650 PyObject* PyUnicodeUCS2_DecodeUTF16Stateful( 2651 const(char)* string, 2652 Py_ssize_t length, 2653 const(char)* errors, 2654 int* byteorder, 2655 Py_ssize_t* consumed 2656 ); 2657 /// ditto 2658 2659 alias PyUnicodeUCS2_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 2660 2661 /** Returns a Python string using the UTF-16 encoding in native byte 2662 order. The string always starts with a BOM mark. */ 2663 PyObject* PyUnicodeUCS2_AsUTF16String(PyObject *unicode); 2664 /// ditto 2665 2666 alias PyUnicodeUCS2_AsUTF16String PyUnicode_AsUTF16String; 2667 2668 /** Returns a Python string object holding the UTF-16 encoded value of 2669 the Unicode data. 2670 2671 If byteorder is not 0, output is written according to the following 2672 byte order: 2673 2674 byteorder == -1: little endian 2675 byteorder == 0: native byte order (writes a BOM mark) 2676 byteorder == 1: big endian 2677 2678 If byteorder is 0, the output string will always start with the 2679 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2680 prepended. 2681 2682 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 2683 UCS-2. This trick makes it possible to add full UTF-16 capabilities 2684 at a later point without compromising the APIs. 2685 2686 */ 2687 PyObject* PyUnicodeUCS2_EncodeUTF16( 2688 Py_UNICODE* data, 2689 Py_ssize_t length, 2690 const(char)* errors, 2691 int byteorder 2692 ); 2693 /// ditto 2694 2695 alias PyUnicodeUCS2_EncodeUTF16 PyUnicode_EncodeUTF16; 2696 2697 2698 /// _ 2699 PyObject* PyUnicodeUCS2_DecodeUnicodeEscape( 2700 const(char)* string, 2701 Py_ssize_t length, 2702 const(char)* errors); 2703 /// ditto 2704 2705 alias PyUnicodeUCS2_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 2706 2707 /// _ 2708 PyObject* PyUnicodeUCS2_AsUnicodeEscapeString( 2709 PyObject* unicode); 2710 /// ditto 2711 2712 alias PyUnicodeUCS2_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 2713 2714 /// _ 2715 PyObject* PyUnicodeUCS2_EncodeUnicodeEscape( 2716 Py_UNICODE* data, 2717 Py_ssize_t length); 2718 /// ditto 2719 2720 alias PyUnicodeUCS2_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 2721 2722 /** 2723 Params: 2724 string = Raw-Unicode-Escape encoded string 2725 length = size of string 2726 errors = error handling 2727 */ 2728 PyObject* PyUnicodeUCS2_DecodeRawUnicodeEscape( 2729 const(char)* string, 2730 Py_ssize_t length, 2731 const(char)* errors); 2732 /// ditto 2733 2734 alias PyUnicodeUCS2_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 2735 2736 /// _ 2737 PyObject* PyUnicodeUCS2_AsRawUnicodeEscapeString(PyObject* unicode); 2738 /// ditto 2739 2740 alias PyUnicodeUCS2_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 2741 2742 /// _ 2743 PyObject* PyUnicodeUCS2_EncodeRawUnicodeEscape( 2744 Py_UNICODE* data, Py_ssize_t length); 2745 /// ditto 2746 2747 alias PyUnicodeUCS2_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 2748 2749 2750 /// _ 2751 PyObject* _PyUnicodeUCS2_DecodeUnicodeInternal( 2752 const(char)* string, 2753 Py_ssize_t length, 2754 const(char)* errors); 2755 /// ditto 2756 2757 alias _PyUnicodeUCS2_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 2758 2759 2760 /** 2761 Params: 2762 string = Latin-1 encoded string 2763 length = size of string 2764 errors = error handling 2765 */ 2766 PyObject* PyUnicodeUCS2_DecodeLatin1( 2767 const(char)* string, 2768 Py_ssize_t length, 2769 const(char)* errors); 2770 /// ditto 2771 2772 alias PyUnicodeUCS2_DecodeLatin1 PyUnicode_DecodeLatin1; 2773 2774 /// _ 2775 PyObject* PyUnicodeUCS2_AsLatin1String(PyObject *unicode); 2776 /// ditto 2777 2778 alias PyUnicodeUCS2_AsLatin1String PyUnicode_AsLatin1String; 2779 2780 /** 2781 Params: 2782 data = Unicode char buffer 2783 length = Number of Py_UNICODE chars to encode 2784 errors = error handling 2785 */ 2786 PyObject* PyUnicodeUCS2_EncodeLatin1( 2787 Py_UNICODE* data, 2788 Py_ssize_t length, 2789 const(char)* errors); 2790 /// ditto 2791 2792 alias PyUnicodeUCS2_EncodeLatin1 PyUnicode_EncodeLatin1; 2793 2794 2795 /** 2796 Params: 2797 data = Unicode char buffer 2798 length = Number of Py_UNICODE chars to encode 2799 errors = error handling 2800 */ 2801 PyObject* PyUnicodeUCS2_DecodeASCII( 2802 const(char)* string, 2803 Py_ssize_t length, 2804 const(char)* errors); 2805 /// ditto 2806 2807 alias PyUnicodeUCS2_DecodeASCII PyUnicode_DecodeASCII; 2808 2809 /// _ 2810 PyObject* PyUnicodeUCS2_AsASCIIString(PyObject *unicode); 2811 /// ditto 2812 2813 alias PyUnicodeUCS2_AsASCIIString PyUnicode_AsASCIIString; 2814 2815 /** 2816 Params: 2817 data = Unicode char buffer 2818 length = Number of Py_UNICODE chars to encode 2819 errors = error handling 2820 */ 2821 PyObject* PyUnicodeUCS2_EncodeASCII( 2822 Py_UNICODE* data, 2823 Py_ssize_t length, 2824 const(char)* errors); 2825 /// ditto 2826 2827 alias PyUnicodeUCS2_EncodeASCII PyUnicode_EncodeASCII; 2828 2829 2830 /** 2831 Params: 2832 string = Encoded string 2833 length = size of string 2834 mapping = character mapping (char ordinal -> unicode ordinal) 2835 errors = error handling 2836 */ 2837 PyObject* PyUnicodeUCS2_DecodeCharmap( 2838 const(char)* string, 2839 Py_ssize_t length, 2840 PyObject* mapping, 2841 const(char)* errors 2842 ); 2843 /// ditto 2844 2845 alias PyUnicodeUCS2_DecodeCharmap PyUnicode_DecodeCharmap; 2846 2847 /** 2848 Params: 2849 unicode = Unicode object 2850 mapping = character mapping (unicode ordinal -> char ordinal) 2851 */ 2852 PyObject* PyUnicodeUCS2_AsCharmapString( 2853 PyObject* unicode, 2854 PyObject* mapping); 2855 /// ditto 2856 2857 alias PyUnicodeUCS2_AsCharmapString PyUnicode_AsCharmapString; 2858 2859 /** 2860 Params: 2861 data = Unicode char buffer 2862 length = Number of Py_UNICODE chars to encode 2863 mapping = character mapping (unicode ordinal -> char ordinal) 2864 errors = error handling 2865 */ 2866 PyObject* PyUnicodeUCS2_EncodeCharmap( 2867 Py_UNICODE* data, 2868 Py_ssize_t length, 2869 PyObject* mapping, 2870 const(char)* errors 2871 ); 2872 /// ditto 2873 2874 alias PyUnicodeUCS2_EncodeCharmap PyUnicode_EncodeCharmap; 2875 2876 /** Translate a Py_UNICODE buffer of the given length by applying a 2877 character mapping table to it and return the resulting Unicode 2878 object. 2879 2880 The mapping table must map Unicode ordinal integers to Unicode 2881 ordinal integers or None (causing deletion of the character). 2882 2883 Mapping tables may be dictionaries or sequences. Unmapped character 2884 ordinals (ones which cause a LookupError) are left untouched and 2885 are copied as-is. 2886 2887 */ 2888 PyObject* PyUnicodeUCS2_TranslateCharmap( 2889 Py_UNICODE* data, 2890 Py_ssize_t length, 2891 PyObject* table, 2892 const(char)* errors 2893 ); 2894 /// ditto 2895 2896 alias PyUnicodeUCS2_TranslateCharmap PyUnicode_TranslateCharmap; 2897 2898 2899 version (Windows) { 2900 /// Availability: Windows only 2901 PyObject* PyUnicodeUCS2_DecodeMBCS( 2902 const(char)* string, 2903 Py_ssize_t length, 2904 const(char)* errors); 2905 /// ditto 2906 2907 alias PyUnicodeUCS2_DecodeMBCS PyUnicode_DecodeMBCS; 2908 2909 /// Availability: Windows only 2910 PyObject* PyUnicodeUCS2_AsMBCSString(PyObject* unicode); 2911 /// ditto 2912 2913 alias PyUnicodeUCS2_AsMBCSString PyUnicode_AsMBCSString; 2914 2915 /// Availability: Windows only 2916 PyObject* PyUnicodeUCS2_EncodeMBCS( 2917 Py_UNICODE* data, 2918 Py_ssize_t length, 2919 const(char)* errors); 2920 /// ditto 2921 2922 alias PyUnicodeUCS2_EncodeMBCS PyUnicode_EncodeMBCS; 2923 2924 } 2925 /** Takes a Unicode string holding a decimal value and writes it into 2926 an output buffer using standard ASCII digit codes. 2927 2928 The output buffer has to provide at least length+1 bytes of storage 2929 area. The output string is 0-terminated. 2930 2931 The encoder converts whitespace to ' ', decimal characters to their 2932 corresponding ASCII digit and all other Latin-1 characters except 2933 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 2934 are treated as errors. This includes embedded NULL bytes. 2935 2936 Error handling is defined by the errors argument: 2937 2938 NULL or "strict": raise a ValueError 2939 "ignore": ignore the wrong characters (these are not copied to the 2940 output buffer) 2941 "replace": replaces illegal characters with '?' 2942 2943 Returns 0 on success, -1 on failure. 2944 2945 */ 2946 int PyUnicodeUCS2_EncodeDecimal( 2947 Py_UNICODE* s, 2948 Py_ssize_t length, 2949 char* output, 2950 const(char)* errors); 2951 /// ditto 2952 2953 alias PyUnicodeUCS2_EncodeDecimal PyUnicode_EncodeDecimal; 2954 2955 2956 /** Concat two strings giving a new Unicode string. */ 2957 PyObject* PyUnicodeUCS2_Concat( 2958 PyObject* left, 2959 PyObject* right); 2960 /// ditto 2961 2962 alias PyUnicodeUCS2_Concat PyUnicode_Concat; 2963 2964 2965 version(Python_3_0_Or_Later) { 2966 /** Concat two strings and put the result in *pleft 2967 (sets *pleft to NULL on error) 2968 Params: 2969 pleft = Pointer to left string 2970 right = Right string 2971 */ 2972 /// Availability: 3.* 2973 2974 void PyUnicodeUCS2_Append( 2975 PyObject** pleft, 2976 PyObject* right 2977 ); 2978 /// ditto 2979 2980 alias PyUnicodeUCS2_Append PyUnicode_Append; 2981 2982 2983 /** Concat two strings, put the result in *pleft and drop the right object 2984 (sets *pleft to NULL on error) 2985 Params: 2986 pleft = Pointer to left string 2987 */ 2988 /// Availability: 3.* 2989 void PyUnicodeUCS2_AppendAndDel( 2990 PyObject** pleft, 2991 PyObject* right 2992 ); 2993 /// ditto 2994 2995 alias PyUnicodeUCS2_AppendAndDel PyUnicode_AppendAndDel; 2996 2997 } 2998 2999 /** Split a string giving a list of Unicode strings. 3000 3001 If sep is NULL, splitting will be done at all whitespace 3002 substrings. Otherwise, splits occur at the given separator. 3003 3004 At most maxsplit splits will be done. If negative, no limit is set. 3005 3006 Separators are not included in the resulting list. 3007 3008 */ 3009 PyObject* PyUnicodeUCS2_Split( 3010 PyObject* s, 3011 PyObject* sep, 3012 Py_ssize_t maxsplit); 3013 /// ditto 3014 3015 alias PyUnicodeUCS2_Split PyUnicode_Split; 3016 3017 3018 /** Ditto PyUnicode_Split, but split at line breaks. 3019 3020 CRLF is considered to be one line break. Line breaks are not 3021 included in the resulting list. */ 3022 PyObject* PyUnicodeUCS2_Splitlines( 3023 PyObject* s, 3024 int keepends); 3025 /// ditto 3026 3027 alias PyUnicodeUCS2_Splitlines PyUnicode_Splitlines; 3028 3029 3030 version(Python_2_5_Or_Later) { 3031 /** Partition a string using a given separator. */ 3032 /// Availability: >= 2.5 3033 PyObject* PyUnicodeUCS2_Partition( 3034 PyObject* s, 3035 PyObject* sep 3036 ); 3037 /// ditto 3038 3039 alias PyUnicodeUCS2_Partition PyUnicode_Partition; 3040 3041 3042 /** Partition a string using a given separator, searching from the end 3043 of the string. */ 3044 3045 PyObject* PyUnicodeUCS2_RPartition( 3046 PyObject* s, 3047 PyObject* sep 3048 ); 3049 /// ditto 3050 3051 alias PyUnicodeUCS2_RPartition PyUnicode_RPartition; 3052 3053 } 3054 3055 /** Split a string giving a list of Unicode strings. 3056 3057 If sep is NULL, splitting will be done at all whitespace 3058 substrings. Otherwise, splits occur at the given separator. 3059 3060 At most maxsplit splits will be done. But unlike PyUnicode_Split 3061 PyUnicode_RSplit splits from the end of the string. If negative, 3062 no limit is set. 3063 3064 Separators are not included in the resulting list. 3065 3066 */ 3067 PyObject* PyUnicodeUCS2_RSplit( 3068 PyObject* s, 3069 PyObject* sep, 3070 Py_ssize_t maxsplit); 3071 /// ditto 3072 3073 alias PyUnicodeUCS2_RSplit PyUnicode_RSplit; 3074 3075 3076 /** Translate a string by applying a character mapping table to it and 3077 return the resulting Unicode object. 3078 3079 The mapping table must map Unicode ordinal integers to Unicode 3080 ordinal integers or None (causing deletion of the character). 3081 3082 Mapping tables may be dictionaries or sequences. Unmapped character 3083 ordinals (ones which cause a LookupError) are left untouched and 3084 are copied as-is. 3085 3086 */ 3087 PyObject* PyUnicodeUCS2_Translate( 3088 PyObject* str, 3089 PyObject* table, 3090 const(char)* errors); 3091 /// ditto 3092 3093 alias PyUnicodeUCS2_Translate PyUnicode_Translate; 3094 3095 3096 /** Join a sequence of strings using the given separator and return 3097 the resulting Unicode string. */ 3098 PyObject* PyUnicodeUCS2_Join( 3099 PyObject* separator, 3100 PyObject* seq); 3101 /// ditto 3102 3103 alias PyUnicodeUCS2_Join PyUnicode_Join; 3104 3105 3106 /** Return 1 if substr matches str[start:end] at the given tail end, 0 3107 otherwise. */ 3108 Py_ssize_t PyUnicodeUCS2_Tailmatch( 3109 PyObject* str, 3110 PyObject* substr, 3111 Py_ssize_t start, 3112 Py_ssize_t end, 3113 int direction 3114 ); 3115 /// ditto 3116 3117 alias PyUnicodeUCS2_Tailmatch PyUnicode_Tailmatch; 3118 3119 3120 /** Return the first position of substr in str[start:end] using the 3121 given search direction or -1 if not found. -2 is returned in case 3122 an error occurred and an exception is set. */ 3123 Py_ssize_t PyUnicodeUCS2_Find( 3124 PyObject* str, 3125 PyObject* substr, 3126 Py_ssize_t start, 3127 Py_ssize_t end, 3128 int direction 3129 ); 3130 /// ditto 3131 3132 alias PyUnicodeUCS2_Find PyUnicode_Find; 3133 3134 3135 /** Count the number of occurrences of substr in str[start:end]. */ 3136 Py_ssize_t PyUnicodeUCS2_Count( 3137 PyObject* str, 3138 PyObject* substr, 3139 Py_ssize_t start, 3140 Py_ssize_t end); 3141 /// ditto 3142 3143 alias PyUnicodeUCS2_Count PyUnicode_Count; 3144 3145 3146 /** Replace at most maxcount occurrences of substr in str with replstr 3147 and return the resulting Unicode object. */ 3148 PyObject* PyUnicodeUCS2_Replace( 3149 PyObject* str, 3150 PyObject* substr, 3151 PyObject* replstr, 3152 Py_ssize_t maxcount 3153 ); 3154 /// ditto 3155 3156 alias PyUnicodeUCS2_Replace PyUnicode_Replace; 3157 3158 3159 /** Compare two strings and return -1, 0, 1 for less than, equal, 3160 greater than resp. */ 3161 int PyUnicodeUCS2_Compare(PyObject* left, PyObject* right); 3162 /// ditto 3163 3164 alias PyUnicodeUCS2_Compare PyUnicode_Compare; 3165 3166 version(Python_3_0_Or_Later) { 3167 /** Compare two strings and return -1, 0, 1 for less than, equal, 3168 greater than resp. 3169 Params: 3170 left = 3171 right = ASCII-encoded string 3172 */ 3173 /// Availability: 3.* 3174 int PyUnicodeUCS2_CompareWithASCIIString( 3175 PyObject* left, 3176 const(char)* right 3177 ); 3178 /// ditto 3179 3180 alias PyUnicodeUCS2_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 3181 3182 } 3183 3184 version(Python_2_5_Or_Later) { 3185 /** Rich compare two strings and return one of the following: 3186 3187 - NULL in case an exception was raised 3188 - Py_True or Py_False for successfuly comparisons 3189 - Py_NotImplemented in case the type combination is unknown 3190 3191 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 3192 case the conversion of the arguments to Unicode fails with a 3193 UnicodeDecodeError. 3194 3195 Possible values for op: 3196 3197 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 3198 3199 */ 3200 /// Availability: >= 2.5 3201 PyObject* PyUnicodeUCS2_RichCompare( 3202 PyObject* left, 3203 PyObject* right, 3204 int op 3205 ); 3206 /// ditto 3207 3208 alias PyUnicodeUCS2_RichCompare PyUnicode_RichCompare; 3209 3210 } 3211 3212 /** Apply a argument tuple or dictionary to a format string and return 3213 the resulting Unicode string. */ 3214 PyObject* PyUnicodeUCS2_Format(PyObject* format, PyObject* args); 3215 /// ditto 3216 3217 alias PyUnicodeUCS2_Format PyUnicode_Format; 3218 3219 3220 /** Checks whether element is contained in container and return 1/0 3221 accordingly. 3222 3223 element has to coerce to an one element Unicode string. -1 is 3224 returned in case of an error. */ 3225 int PyUnicodeUCS2_Contains(PyObject* container, PyObject* element); 3226 /// ditto 3227 3228 alias PyUnicodeUCS2_Contains PyUnicode_Contains; 3229 3230 3231 version(Python_3_0_Or_Later) { 3232 /** Checks whether argument is a valid identifier. */ 3233 /// Availability: 3.* 3234 int PyUnicodeUCS2_IsIdentifier(PyObject* s); 3235 /// ditto 3236 3237 alias PyUnicodeUCS2_IsIdentifier PyUnicode_IsIdentifier; 3238 3239 } 3240 3241 3242 /// _ 3243 int _PyUnicodeUCS2_IsLowercase(Py_UNICODE ch); 3244 /// ditto 3245 3246 alias _PyUnicodeUCS2_IsLowercase _PyUnicode_IsLowercase; 3247 3248 /// _ 3249 int _PyUnicodeUCS2_IsUppercase(Py_UNICODE ch); 3250 /// ditto 3251 3252 alias _PyUnicodeUCS2_IsUppercase _PyUnicode_IsUppercase; 3253 3254 /// _ 3255 int _PyUnicodeUCS2_IsTitlecase(Py_UNICODE ch); 3256 /// ditto 3257 3258 alias _PyUnicodeUCS2_IsTitlecase _PyUnicode_IsTitlecase; 3259 3260 /// _ 3261 int _PyUnicodeUCS2_IsWhitespace(Py_UNICODE ch); 3262 /// ditto 3263 3264 alias _PyUnicodeUCS2_IsWhitespace _PyUnicode_IsWhitespace; 3265 3266 /// _ 3267 int _PyUnicodeUCS2_IsLinebreak(Py_UNICODE ch); 3268 /// ditto 3269 3270 alias _PyUnicodeUCS2_IsLinebreak _PyUnicode_IsLinebreak; 3271 3272 /// _ 3273 Py_UNICODE _PyUnicodeUCS2_ToLowercase(Py_UNICODE ch); 3274 /// ditto 3275 3276 alias _PyUnicodeUCS2_ToLowercase _PyUnicode_ToLowercase; 3277 3278 /// _ 3279 Py_UNICODE _PyUnicodeUCS2_ToUppercase(Py_UNICODE ch); 3280 /// ditto 3281 3282 alias _PyUnicodeUCS2_ToUppercase _PyUnicode_ToUppercase; 3283 3284 /// _ 3285 Py_UNICODE _PyUnicodeUCS2_ToTitlecase(Py_UNICODE ch); 3286 /// ditto 3287 3288 alias _PyUnicodeUCS2_ToTitlecase _PyUnicode_ToTitlecase; 3289 3290 /// _ 3291 int _PyUnicodeUCS2_ToDecimalDigit(Py_UNICODE ch); 3292 /// ditto 3293 3294 alias _PyUnicodeUCS2_ToDecimalDigit _PyUnicode_ToDecimalDigit; 3295 3296 /// _ 3297 int _PyUnicodeUCS2_ToDigit(Py_UNICODE ch); 3298 /// ditto 3299 3300 alias _PyUnicodeUCS2_ToDigit _PyUnicode_ToDigit; 3301 3302 /// _ 3303 double _PyUnicodeUCS2_ToNumeric(Py_UNICODE ch); 3304 /// ditto 3305 3306 alias _PyUnicodeUCS2_ToNumeric _PyUnicode_ToNumeric; 3307 3308 /// _ 3309 int _PyUnicodeUCS2_IsDecimalDigit(Py_UNICODE ch); 3310 /// ditto 3311 3312 alias _PyUnicodeUCS2_IsDecimalDigit _PyUnicode_IsDecimalDigit; 3313 3314 /// _ 3315 int _PyUnicodeUCS2_IsDigit(Py_UNICODE ch); 3316 /// ditto 3317 3318 alias _PyUnicodeUCS2_IsDigit _PyUnicode_IsDigit; 3319 3320 /// _ 3321 int _PyUnicodeUCS2_IsNumeric(Py_UNICODE ch); 3322 /// ditto 3323 3324 alias _PyUnicodeUCS2_IsNumeric _PyUnicode_IsNumeric; 3325 3326 /// _ 3327 int _PyUnicodeUCS2_IsAlpha(Py_UNICODE ch); 3328 /// ditto 3329 3330 alias _PyUnicodeUCS2_IsAlpha _PyUnicode_IsAlpha; 3331 3332 }else{ 3333 3334 version(Python_2_6_Or_Later) { 3335 3336 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 3337 size. 3338 3339 u may be NULL which causes the contents to be undefined. It is the 3340 user's responsibility to fill in the needed data afterwards. Note 3341 that modifying the Unicode object contents after construction is 3342 only allowed if u was set to NULL. 3343 3344 The buffer is copied into the new object. */ 3345 /// Availability: >= 2.6 3346 PyObject* PyUnicodeUCS4_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 3347 /// ditto 3348 3349 alias PyUnicodeUCS4_FromUnicode PyUnicode_FromUnicode; 3350 3351 3352 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 3353 /// Availability: >= 2.6 3354 PyObject* PyUnicodeUCS4_FromStringAndSize( 3355 const(char)*u, /* char buffer */ 3356 Py_ssize_t size /* size of buffer */ 3357 ); 3358 /// ditto 3359 3360 alias PyUnicodeUCS4_FromStringAndSize PyUnicode_FromStringAndSize; 3361 3362 3363 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 3364 Latin-1 encoded bytes */ 3365 /// Availability: >= 2.6 3366 PyObject* PyUnicodeUCS4_FromString( 3367 const(char)*u /* string */ 3368 ); 3369 /// ditto 3370 3371 alias PyUnicodeUCS4_FromString PyUnicode_FromString; 3372 3373 /// Availability: >= 2.6 3374 PyObject* PyUnicodeUCS4_FromFormatV(const(char)*, va_list); 3375 /// ditto 3376 3377 alias PyUnicodeUCS4_FromFormatV PyUnicode_FromFormatV; 3378 3379 /// Availability: >= 2.6 3380 PyObject* PyUnicodeUCS4_FromFormat(const(char)*, ...); 3381 /// ditto 3382 3383 alias PyUnicodeUCS4_FromFormat PyUnicode_FromFormat; 3384 3385 3386 /** Format the object based on the format_spec, as defined in PEP 3101 3387 (Advanced String Formatting). */ 3388 /// Availability: >= 2.6 3389 PyObject* _PyUnicodeUCS4_FormatAdvanced(PyObject *obj, 3390 Py_UNICODE *format_spec, 3391 Py_ssize_t format_spec_len); 3392 /// ditto 3393 3394 alias _PyUnicodeUCS4_FormatAdvanced _PyUnicode_FormatAdvanced; 3395 3396 /// Availability: >= 2.6 3397 int PyUnicodeUCS4_ClearFreeList(); 3398 /// ditto 3399 3400 alias PyUnicodeUCS4_ClearFreeList PyUnicode_ClearFreeList; 3401 3402 /** 3403 Params: 3404 string = UTF-7 encoded string 3405 length = size of string 3406 error = error handling 3407 consumed = bytes consumed 3408 */ 3409 /// Availability: >= 2.6 3410 PyObject* PyUnicodeUCS4_DecodeUTF7Stateful( 3411 const(char)* string, 3412 Py_ssize_t length, 3413 const(char)*errors, 3414 Py_ssize_t *consumed 3415 ); 3416 /// ditto 3417 3418 alias PyUnicodeUCS4_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 3419 3420 /** 3421 Params: 3422 string = UTF-32 encoded string 3423 length = size of string 3424 error = error handling 3425 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3426 */ 3427 /// Availability: >= 2.6 3428 PyObject* PyUnicodeUCS4_DecodeUTF32( 3429 const(char)* string, 3430 Py_ssize_t length, 3431 const(char)*errors, 3432 int *byteorder 3433 ); 3434 /// ditto 3435 3436 alias PyUnicodeUCS4_DecodeUTF32 PyUnicode_DecodeUTF32; 3437 3438 3439 /** 3440 Params: 3441 string = UTF-32 encoded string 3442 length = size of string 3443 error = error handling 3444 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3445 */ 3446 /// Availability: >= 2.6 3447 PyObject* PyUnicodeUCS4_DecodeUTF32Stateful( 3448 const(char)*string, 3449 Py_ssize_t length, 3450 const(char)*errors, 3451 int *byteorder, 3452 Py_ssize_t *consumed 3453 ); 3454 /// ditto 3455 3456 alias PyUnicodeUCS4_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 3457 3458 /** Returns a Python string using the UTF-32 encoding in native byte 3459 order. The string always starts with a BOM mark. */ 3460 /// Availability: >= 2.6 3461 3462 PyObject* PyUnicodeUCS4_AsUTF32String( 3463 PyObject *unicode 3464 ); 3465 /// ditto 3466 3467 alias PyUnicodeUCS4_AsUTF32String PyUnicode_AsUTF32String; 3468 3469 3470 /** Returns a Python string object holding the UTF-32 encoded value of 3471 the Unicode data. 3472 3473 If byteorder is not 0, output is written according to the following 3474 byte order: 3475 3476 byteorder == -1: little endian 3477 byteorder == 0: native byte order (writes a BOM mark) 3478 byteorder == 1: big endian 3479 3480 If byteorder is 0, the output string will always start with the 3481 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3482 prepended. 3483 Params: 3484 data = Unicode char buffer 3485 length = number of Py_UNICODE chars to encode 3486 errors = error handling 3487 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 3488 3489 */ 3490 /// Availability: >= 2.6 3491 PyObject* PyUnicodeUCS4_EncodeUTF32( 3492 const Py_UNICODE *data, 3493 Py_ssize_t length, 3494 const(char)* errors, 3495 int byteorder 3496 ); 3497 /// ditto 3498 3499 alias PyUnicodeUCS4_EncodeUTF32 PyUnicode_EncodeUTF32; 3500 3501 } 3502 3503 /** Return a read-only pointer to the Unicode object's internal 3504 Py_UNICODE buffer. */ 3505 Py_UNICODE* PyUnicodeUCS4_AsUnicode(PyObject* unicode); 3506 /// ditto 3507 3508 alias PyUnicodeUCS4_AsUnicode PyUnicode_AsUnicode; 3509 3510 /** Get the length of the Unicode object. */ 3511 Py_ssize_t PyUnicodeUCS4_GetSize(PyObject* unicode); 3512 /// ditto 3513 3514 alias PyUnicodeUCS4_GetSize PyUnicode_GetSize; 3515 3516 3517 /** Get the maximum ordinal for a Unicode character. */ 3518 Py_UNICODE PyUnicodeUCS4_GetMax(); 3519 /// ditto 3520 3521 alias PyUnicodeUCS4_GetMax PyUnicode_GetMax; 3522 3523 3524 /** Resize an already allocated Unicode object to the new size length. 3525 3526 _*unicode is modified to point to the new (resized) object and 0 3527 returned on success. 3528 3529 This API may only be called by the function which also called the 3530 Unicode constructor. The refcount on the object must be 1. Otherwise, 3531 an error is returned. 3532 3533 Error handling is implemented as follows: an exception is set, -1 3534 is returned and *unicode left untouched. 3535 Params: 3536 unicode = pointer to the new unicode object. 3537 length = New length. 3538 3539 */ 3540 int PyUnicodeUCS4_Resize(PyObject** unicode, Py_ssize_t length); 3541 /// ditto 3542 3543 alias PyUnicodeUCS4_Resize PyUnicode_Resize; 3544 3545 /** Coerce obj to an Unicode object and return a reference with 3546 _*incremented* refcount. 3547 3548 Coercion is done in the following way: 3549 3550 1. String and other char buffer compatible objects are decoded 3551 under the assumptions that they contain data using the current 3552 default encoding. Decoding is done in "strict" mode. 3553 3554 2. All other objects (including Unicode objects) raise an 3555 exception. 3556 3557 The API returns NULL in case of an error. The caller is responsible 3558 for decref'ing the returned objects. 3559 3560 */ 3561 PyObject* PyUnicodeUCS4_FromEncodedObject( 3562 PyObject* obj, 3563 const(char)* encoding, 3564 const(char)* errors); 3565 /// ditto 3566 3567 alias PyUnicodeUCS4_FromEncodedObject PyUnicode_FromEncodedObject; 3568 3569 3570 /** Coerce obj to an Unicode object and return a reference with 3571 _*incremented* refcount. 3572 3573 Unicode objects are passed back as-is (subclasses are converted to 3574 true Unicode objects), all other objects are delegated to 3575 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 3576 using the default encoding as basis for decoding the object. 3577 3578 The API returns NULL in case of an error. The caller is responsible 3579 for decref'ing the returned objects. 3580 3581 */ 3582 PyObject* PyUnicodeUCS4_FromObject(PyObject* obj); 3583 /// ditto 3584 3585 alias PyUnicodeUCS4_FromObject PyUnicode_FromObject; 3586 3587 3588 /** Create a Unicode Object from the whcar_t buffer w of the given 3589 size. 3590 3591 The buffer is copied into the new object. */ 3592 PyObject* PyUnicodeUCS4_FromWideChar(const(wchar)* w, Py_ssize_t size); 3593 /// ditto 3594 3595 alias PyUnicodeUCS4_FromWideChar PyUnicode_FromWideChar; 3596 3597 3598 /** Copies the Unicode Object contents into the wchar_t buffer w. At 3599 most size wchar_t characters are copied. 3600 3601 Note that the resulting wchar_t string may or may not be 3602 0-terminated. It is the responsibility of the caller to make sure 3603 that the wchar_t string is 0-terminated in case this is required by 3604 the application. 3605 3606 Returns the number of wchar_t characters copied (excluding a 3607 possibly trailing 0-termination character) or -1 in case of an 3608 error. */ 3609 Py_ssize_t PyUnicodeUCS4_AsWideChar( 3610 PyUnicodeObject* unicode, 3611 const(wchar)* w, 3612 Py_ssize_t size); 3613 /// ditto 3614 3615 alias PyUnicodeUCS4_AsWideChar PyUnicode_AsWideChar; 3616 3617 3618 /** Create a Unicode Object from the given Unicode code point ordinal. 3619 3620 The ordinal must be in range(0x10000) on narrow Python builds 3621 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 3622 raised in case it is not. 3623 3624 */ 3625 PyObject* PyUnicodeUCS4_FromOrdinal(int ordinal); 3626 /// ditto 3627 3628 alias PyUnicodeUCS4_FromOrdinal PyUnicode_FromOrdinal; 3629 3630 3631 /** Return a Python string holding the default encoded value of the 3632 Unicode object. 3633 3634 The resulting string is cached in the Unicode object for subsequent 3635 usage by this function. The cached version is needed to implement 3636 the character buffer interface and will live (at least) as long as 3637 the Unicode object itself. 3638 3639 The refcount of the string is *not* incremented. 3640 3641 _*** Exported for internal use by the interpreter only !!! *** 3642 3643 */ 3644 PyObject* _PyUnicodeUCS4_AsDefaultEncodedString(PyObject *, const(char)*); 3645 /// ditto 3646 3647 alias _PyUnicodeUCS4_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 3648 3649 3650 /** Returns the currently active default encoding. 3651 3652 The default encoding is currently implemented as run-time settable 3653 process global. This may change in future versions of the 3654 interpreter to become a parameter which is managed on a per-thread 3655 basis. 3656 3657 */ 3658 const(char)* PyUnicodeUCS4_GetDefaultEncoding(); 3659 /// ditto 3660 3661 alias PyUnicodeUCS4_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 3662 3663 3664 /** Sets the currently active default encoding. 3665 3666 Returns 0 on success, -1 in case of an error. 3667 3668 */ 3669 int PyUnicodeUCS4_SetDefaultEncoding(const(char)*encoding); 3670 /// ditto 3671 3672 alias PyUnicodeUCS4_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 3673 3674 3675 /** Create a Unicode object by decoding the encoded string s of the 3676 given size. 3677 Params: 3678 s = encoded string 3679 size = size of buffer 3680 encoding = encoding 3681 errors = error handling 3682 */ 3683 PyObject* PyUnicodeUCS4_Decode( 3684 const(char)* s, 3685 Py_ssize_t size, 3686 const(char)* encoding, 3687 const(char)* errors); 3688 /// ditto 3689 3690 alias PyUnicodeUCS4_Decode PyUnicode_Decode; 3691 3692 3693 version(Python_3_0_Or_Later) { 3694 /** Decode a Unicode object unicode and return the result as Python 3695 object. */ 3696 /// Availability: 3.* 3697 3698 PyObject* PyUnicodeUCS4_AsDecodedObject( 3699 PyObject* unicode, 3700 const(char)* encoding, 3701 const(char)* errors 3702 ); 3703 /// ditto 3704 3705 alias PyUnicodeUCS4_AsDecodedObject PyUnicode_AsDecodedObject; 3706 3707 /** Decode a Unicode object unicode and return the result as Unicode 3708 object. */ 3709 /// Availability: 3.* 3710 3711 PyObject* PyUnicodeUCS4_AsDecodedUnicode( 3712 PyObject* unicode, 3713 const(char)* encoding, 3714 const(char)* errors 3715 ); 3716 /// ditto 3717 3718 alias PyUnicodeUCS4_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 3719 3720 } 3721 3722 /** Encodes a Py_UNICODE buffer of the given size and returns a 3723 Python string object. 3724 Params: 3725 s = Unicode char buffer 3726 size = number of Py_UNICODE chars to encode 3727 encoding = encoding 3728 errors = error handling 3729 */ 3730 PyObject* PyUnicodeUCS4_Encode( 3731 Py_UNICODE* s, 3732 Py_ssize_t size, 3733 const(char)* encoding, 3734 const(char)* errors); 3735 /// ditto 3736 3737 alias PyUnicodeUCS4_Encode PyUnicode_Encode; 3738 3739 3740 /** Encodes a Unicode object and returns the result as Python object. 3741 */ 3742 PyObject* PyUnicodeUCS4_AsEncodedObject( 3743 PyObject* unicode, 3744 const(char)* encoding, 3745 const(char)* errors); 3746 /// ditto 3747 3748 alias PyUnicodeUCS4_AsEncodedObject PyUnicode_AsEncodedObject; 3749 3750 3751 /** Encodes a Unicode object and returns the result as Python string 3752 object. */ 3753 PyObject* PyUnicodeUCS4_AsEncodedString( 3754 PyObject* unicode, 3755 const(char)* encoding, 3756 const(char)* errors); 3757 /// ditto 3758 3759 alias PyUnicodeUCS4_AsEncodedString PyUnicode_AsEncodedString; 3760 3761 3762 version(Python_3_0_Or_Later) { 3763 /** Encodes a Unicode object and returns the result as Unicode 3764 object. */ 3765 /// Availability: >= 3.* 3766 PyObject* PyUnicodeUCS4_AsEncodedUnicode( 3767 PyObject* unicode, 3768 const(char)* encoding, 3769 const(char)* errors 3770 ); 3771 /// ditto 3772 3773 alias PyUnicodeUCS4_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 3774 3775 } 3776 3777 /** 3778 Params: 3779 string = UTF-7 encoded string 3780 length = size of string 3781 errors = error handling 3782 */ 3783 PyObject* PyUnicodeUCS4_DecodeUTF7( 3784 const(char)* string, 3785 Py_ssize_t length, 3786 const(char)* errors); 3787 /// ditto 3788 3789 alias PyUnicodeUCS4_DecodeUTF7 PyUnicode_DecodeUTF7; 3790 3791 3792 /** 3793 Params: 3794 data = Unicode char buffer 3795 length = number of Py_UNICODE chars to encode 3796 base64SetO = Encode RFC2152 Set O characters in base64 3797 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 3798 errors = error handling 3799 */ 3800 PyObject* PyUnicodeUCS4_EncodeUTF7( 3801 Py_UNICODE* data, 3802 Py_ssize_t length, 3803 int encodeSetO, 3804 int encodeWhiteSpace, 3805 const(char)* errors 3806 ); 3807 /// ditto 3808 3809 alias PyUnicodeUCS4_EncodeUTF7 PyUnicode_EncodeUTF7; 3810 3811 3812 /// _ 3813 PyObject* PyUnicodeUCS4_DecodeUTF8( 3814 const(char)* string, 3815 Py_ssize_t length, 3816 const(char)* errors); 3817 /// ditto 3818 3819 alias PyUnicodeUCS4_DecodeUTF8 PyUnicode_DecodeUTF8; 3820 3821 /// _ 3822 PyObject* PyUnicodeUCS4_DecodeUTF8Stateful( 3823 const(char)* string, 3824 Py_ssize_t length, 3825 const(char)* errors, 3826 Py_ssize_t* consumed 3827 ); 3828 /// ditto 3829 3830 alias PyUnicodeUCS4_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 3831 3832 /// _ 3833 PyObject* PyUnicodeUCS4_AsUTF8String(PyObject* unicode); 3834 /// ditto 3835 3836 alias PyUnicodeUCS4_AsUTF8String PyUnicode_AsUTF8String; 3837 3838 /// _ 3839 PyObject* PyUnicodeUCS4_EncodeUTF8( 3840 Py_UNICODE* data, 3841 Py_ssize_t length, 3842 const(char) *errors); 3843 /// ditto 3844 3845 alias PyUnicodeUCS4_EncodeUTF8 PyUnicode_EncodeUTF8; 3846 3847 3848 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 3849 the corresponding Unicode object. 3850 3851 errors (if non-NULL) defines the error handling. It defaults 3852 to "strict". 3853 3854 If byteorder is non-NULL, the decoder starts decoding using the 3855 given byte order: 3856 3857 *byteorder == -1: little endian 3858 *byteorder == 0: native order 3859 *byteorder == 1: big endian 3860 3861 In native mode, the first two bytes of the stream are checked for a 3862 BOM mark. If found, the BOM mark is analysed, the byte order 3863 adjusted and the BOM skipped. In the other modes, no BOM mark 3864 interpretation is done. After completion, *byteorder is set to the 3865 current byte order at the end of input data. 3866 3867 If byteorder is NULL, the codec starts in native order mode. 3868 3869 */ 3870 PyObject* PyUnicodeUCS4_DecodeUTF16( 3871 const(char)* string, 3872 Py_ssize_t length, 3873 const(char)* errors, 3874 int* byteorder); 3875 /// ditto 3876 3877 alias PyUnicodeUCS4_DecodeUTF16 PyUnicode_DecodeUTF16; 3878 3879 /** 3880 Params: 3881 string = UTF-16 encoded string 3882 length = size of string 3883 errors = error handling 3884 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3885 consumed = bytes consumed 3886 */ 3887 PyObject* PyUnicodeUCS4_DecodeUTF16Stateful( 3888 const(char)* string, 3889 Py_ssize_t length, 3890 const(char)* errors, 3891 int* byteorder, 3892 Py_ssize_t* consumed 3893 ); 3894 /// ditto 3895 3896 alias PyUnicodeUCS4_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 3897 3898 /** Returns a Python string using the UTF-16 encoding in native byte 3899 order. The string always starts with a BOM mark. */ 3900 PyObject* PyUnicodeUCS4_AsUTF16String(PyObject *unicode); 3901 /// ditto 3902 3903 alias PyUnicodeUCS4_AsUTF16String PyUnicode_AsUTF16String; 3904 3905 /** Returns a Python string object holding the UTF-16 encoded value of 3906 the Unicode data. 3907 3908 If byteorder is not 0, output is written according to the following 3909 byte order: 3910 3911 byteorder == -1: little endian 3912 byteorder == 0: native byte order (writes a BOM mark) 3913 byteorder == 1: big endian 3914 3915 If byteorder is 0, the output string will always start with the 3916 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3917 prepended. 3918 3919 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 3920 UCS-2. This trick makes it possible to add full UTF-16 capabilities 3921 at a later point without compromising the APIs. 3922 3923 */ 3924 PyObject* PyUnicodeUCS4_EncodeUTF16( 3925 Py_UNICODE* data, 3926 Py_ssize_t length, 3927 const(char)* errors, 3928 int byteorder 3929 ); 3930 /// ditto 3931 3932 alias PyUnicodeUCS4_EncodeUTF16 PyUnicode_EncodeUTF16; 3933 3934 3935 /// _ 3936 PyObject* PyUnicodeUCS4_DecodeUnicodeEscape( 3937 const(char)* string, 3938 Py_ssize_t length, 3939 const(char)* errors); 3940 /// ditto 3941 3942 alias PyUnicodeUCS4_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 3943 3944 /// _ 3945 PyObject* PyUnicodeUCS4_AsUnicodeEscapeString( 3946 PyObject* unicode); 3947 /// ditto 3948 3949 alias PyUnicodeUCS4_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 3950 3951 /// _ 3952 PyObject* PyUnicodeUCS4_EncodeUnicodeEscape( 3953 Py_UNICODE* data, 3954 Py_ssize_t length); 3955 /// ditto 3956 3957 alias PyUnicodeUCS4_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 3958 3959 /** 3960 Params: 3961 string = Raw-Unicode-Escape encoded string 3962 length = size of string 3963 errors = error handling 3964 */ 3965 PyObject* PyUnicodeUCS4_DecodeRawUnicodeEscape( 3966 const(char)* string, 3967 Py_ssize_t length, 3968 const(char)* errors); 3969 /// ditto 3970 3971 alias PyUnicodeUCS4_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 3972 3973 /// _ 3974 PyObject* PyUnicodeUCS4_AsRawUnicodeEscapeString(PyObject* unicode); 3975 /// ditto 3976 3977 alias PyUnicodeUCS4_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 3978 3979 /// _ 3980 PyObject* PyUnicodeUCS4_EncodeRawUnicodeEscape( 3981 Py_UNICODE* data, Py_ssize_t length); 3982 /// ditto 3983 3984 alias PyUnicodeUCS4_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 3985 3986 3987 /// _ 3988 PyObject* _PyUnicodeUCS4_DecodeUnicodeInternal( 3989 const(char)* string, 3990 Py_ssize_t length, 3991 const(char)* errors); 3992 /// ditto 3993 3994 alias _PyUnicodeUCS4_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 3995 3996 3997 /** 3998 Params: 3999 string = Latin-1 encoded string 4000 length = size of string 4001 errors = error handling 4002 */ 4003 PyObject* PyUnicodeUCS4_DecodeLatin1( 4004 const(char)* string, 4005 Py_ssize_t length, 4006 const(char)* errors); 4007 /// ditto 4008 4009 alias PyUnicodeUCS4_DecodeLatin1 PyUnicode_DecodeLatin1; 4010 4011 /// _ 4012 PyObject* PyUnicodeUCS4_AsLatin1String(PyObject *unicode); 4013 /// ditto 4014 4015 alias PyUnicodeUCS4_AsLatin1String PyUnicode_AsLatin1String; 4016 4017 /** 4018 Params: 4019 data = Unicode char buffer 4020 length = Number of Py_UNICODE chars to encode 4021 errors = error handling 4022 */ 4023 PyObject* PyUnicodeUCS4_EncodeLatin1( 4024 Py_UNICODE* data, 4025 Py_ssize_t length, 4026 const(char)* errors); 4027 /// ditto 4028 4029 alias PyUnicodeUCS4_EncodeLatin1 PyUnicode_EncodeLatin1; 4030 4031 4032 /** 4033 Params: 4034 data = Unicode char buffer 4035 length = Number of Py_UNICODE chars to encode 4036 errors = error handling 4037 */ 4038 PyObject* PyUnicodeUCS4_DecodeASCII( 4039 const(char)* string, 4040 Py_ssize_t length, 4041 const(char)* errors); 4042 /// ditto 4043 4044 alias PyUnicodeUCS4_DecodeASCII PyUnicode_DecodeASCII; 4045 4046 /// _ 4047 PyObject* PyUnicodeUCS4_AsASCIIString(PyObject *unicode); 4048 /// ditto 4049 4050 alias PyUnicodeUCS4_AsASCIIString PyUnicode_AsASCIIString; 4051 4052 /** 4053 Params: 4054 data = Unicode char buffer 4055 length = Number of Py_UNICODE chars to encode 4056 errors = error handling 4057 */ 4058 PyObject* PyUnicodeUCS4_EncodeASCII( 4059 Py_UNICODE* data, 4060 Py_ssize_t length, 4061 const(char)* errors); 4062 /// ditto 4063 4064 alias PyUnicodeUCS4_EncodeASCII PyUnicode_EncodeASCII; 4065 4066 4067 /** 4068 Params: 4069 string = Encoded string 4070 length = size of string 4071 mapping = character mapping (char ordinal -> unicode ordinal) 4072 errors = error handling 4073 */ 4074 PyObject* PyUnicodeUCS4_DecodeCharmap( 4075 const(char)* string, 4076 Py_ssize_t length, 4077 PyObject* mapping, 4078 const(char)* errors 4079 ); 4080 /// ditto 4081 4082 alias PyUnicodeUCS4_DecodeCharmap PyUnicode_DecodeCharmap; 4083 4084 /** 4085 Params: 4086 unicode = Unicode object 4087 mapping = character mapping (unicode ordinal -> char ordinal) 4088 */ 4089 PyObject* PyUnicodeUCS4_AsCharmapString( 4090 PyObject* unicode, 4091 PyObject* mapping); 4092 /// ditto 4093 4094 alias PyUnicodeUCS4_AsCharmapString PyUnicode_AsCharmapString; 4095 4096 /** 4097 Params: 4098 data = Unicode char buffer 4099 length = Number of Py_UNICODE chars to encode 4100 mapping = character mapping (unicode ordinal -> char ordinal) 4101 errors = error handling 4102 */ 4103 PyObject* PyUnicodeUCS4_EncodeCharmap( 4104 Py_UNICODE* data, 4105 Py_ssize_t length, 4106 PyObject* mapping, 4107 const(char)* errors 4108 ); 4109 /// ditto 4110 4111 alias PyUnicodeUCS4_EncodeCharmap PyUnicode_EncodeCharmap; 4112 4113 /** Translate a Py_UNICODE buffer of the given length by applying a 4114 character mapping table to it and return the resulting Unicode 4115 object. 4116 4117 The mapping table must map Unicode ordinal integers to Unicode 4118 ordinal integers or None (causing deletion of the character). 4119 4120 Mapping tables may be dictionaries or sequences. Unmapped character 4121 ordinals (ones which cause a LookupError) are left untouched and 4122 are copied as-is. 4123 4124 */ 4125 PyObject* PyUnicodeUCS4_TranslateCharmap( 4126 Py_UNICODE* data, 4127 Py_ssize_t length, 4128 PyObject* table, 4129 const(char)* errors 4130 ); 4131 /// ditto 4132 4133 alias PyUnicodeUCS4_TranslateCharmap PyUnicode_TranslateCharmap; 4134 4135 4136 version (Windows) { 4137 /// Availability: Windows only 4138 PyObject* PyUnicodeUCS4_DecodeMBCS( 4139 const(char)* string, 4140 Py_ssize_t length, 4141 const(char)* errors); 4142 /// ditto 4143 4144 alias PyUnicodeUCS4_DecodeMBCS PyUnicode_DecodeMBCS; 4145 4146 /// Availability: Windows only 4147 PyObject* PyUnicodeUCS4_AsMBCSString(PyObject* unicode); 4148 /// ditto 4149 4150 alias PyUnicodeUCS4_AsMBCSString PyUnicode_AsMBCSString; 4151 4152 /// Availability: Windows only 4153 PyObject* PyUnicodeUCS4_EncodeMBCS( 4154 Py_UNICODE* data, 4155 Py_ssize_t length, 4156 const(char)* errors); 4157 /// ditto 4158 4159 alias PyUnicodeUCS4_EncodeMBCS PyUnicode_EncodeMBCS; 4160 4161 } 4162 /** Takes a Unicode string holding a decimal value and writes it into 4163 an output buffer using standard ASCII digit codes. 4164 4165 The output buffer has to provide at least length+1 bytes of storage 4166 area. The output string is 0-terminated. 4167 4168 The encoder converts whitespace to ' ', decimal characters to their 4169 corresponding ASCII digit and all other Latin-1 characters except 4170 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 4171 are treated as errors. This includes embedded NULL bytes. 4172 4173 Error handling is defined by the errors argument: 4174 4175 NULL or "strict": raise a ValueError 4176 "ignore": ignore the wrong characters (these are not copied to the 4177 output buffer) 4178 "replace": replaces illegal characters with '?' 4179 4180 Returns 0 on success, -1 on failure. 4181 4182 */ 4183 int PyUnicodeUCS4_EncodeDecimal( 4184 Py_UNICODE* s, 4185 Py_ssize_t length, 4186 char* output, 4187 const(char)* errors); 4188 /// ditto 4189 4190 alias PyUnicodeUCS4_EncodeDecimal PyUnicode_EncodeDecimal; 4191 4192 4193 /** Concat two strings giving a new Unicode string. */ 4194 PyObject* PyUnicodeUCS4_Concat( 4195 PyObject* left, 4196 PyObject* right); 4197 /// ditto 4198 4199 alias PyUnicodeUCS4_Concat PyUnicode_Concat; 4200 4201 4202 version(Python_3_0_Or_Later) { 4203 /** Concat two strings and put the result in *pleft 4204 (sets *pleft to NULL on error) 4205 Params: 4206 pleft = Pointer to left string 4207 right = Right string 4208 */ 4209 /// Availability: 3.* 4210 4211 void PyUnicodeUCS4_Append( 4212 PyObject** pleft, 4213 PyObject* right 4214 ); 4215 /// ditto 4216 4217 alias PyUnicodeUCS4_Append PyUnicode_Append; 4218 4219 4220 /** Concat two strings, put the result in *pleft and drop the right object 4221 (sets *pleft to NULL on error) 4222 Params: 4223 pleft = Pointer to left string 4224 */ 4225 /// Availability: 3.* 4226 void PyUnicodeUCS4_AppendAndDel( 4227 PyObject** pleft, 4228 PyObject* right 4229 ); 4230 /// ditto 4231 4232 alias PyUnicodeUCS4_AppendAndDel PyUnicode_AppendAndDel; 4233 4234 } 4235 4236 /** Split a string giving a list of Unicode strings. 4237 4238 If sep is NULL, splitting will be done at all whitespace 4239 substrings. Otherwise, splits occur at the given separator. 4240 4241 At most maxsplit splits will be done. If negative, no limit is set. 4242 4243 Separators are not included in the resulting list. 4244 4245 */ 4246 PyObject* PyUnicodeUCS4_Split( 4247 PyObject* s, 4248 PyObject* sep, 4249 Py_ssize_t maxsplit); 4250 /// ditto 4251 4252 alias PyUnicodeUCS4_Split PyUnicode_Split; 4253 4254 4255 /** Ditto PyUnicode_Split, but split at line breaks. 4256 4257 CRLF is considered to be one line break. Line breaks are not 4258 included in the resulting list. */ 4259 PyObject* PyUnicodeUCS4_Splitlines( 4260 PyObject* s, 4261 int keepends); 4262 /// ditto 4263 4264 alias PyUnicodeUCS4_Splitlines PyUnicode_Splitlines; 4265 4266 4267 version(Python_2_5_Or_Later) { 4268 /** Partition a string using a given separator. */ 4269 /// Availability: >= 2.5 4270 PyObject* PyUnicodeUCS4_Partition( 4271 PyObject* s, 4272 PyObject* sep 4273 ); 4274 /// ditto 4275 4276 alias PyUnicodeUCS4_Partition PyUnicode_Partition; 4277 4278 4279 /** Partition a string using a given separator, searching from the end 4280 of the string. */ 4281 4282 PyObject* PyUnicodeUCS4_RPartition( 4283 PyObject* s, 4284 PyObject* sep 4285 ); 4286 /// ditto 4287 4288 alias PyUnicodeUCS4_RPartition PyUnicode_RPartition; 4289 4290 } 4291 4292 /** Split a string giving a list of Unicode strings. 4293 4294 If sep is NULL, splitting will be done at all whitespace 4295 substrings. Otherwise, splits occur at the given separator. 4296 4297 At most maxsplit splits will be done. But unlike PyUnicode_Split 4298 PyUnicode_RSplit splits from the end of the string. If negative, 4299 no limit is set. 4300 4301 Separators are not included in the resulting list. 4302 4303 */ 4304 PyObject* PyUnicodeUCS4_RSplit( 4305 PyObject* s, 4306 PyObject* sep, 4307 Py_ssize_t maxsplit); 4308 /// ditto 4309 4310 alias PyUnicodeUCS4_RSplit PyUnicode_RSplit; 4311 4312 4313 /** Translate a string by applying a character mapping table to it and 4314 return the resulting Unicode object. 4315 4316 The mapping table must map Unicode ordinal integers to Unicode 4317 ordinal integers or None (causing deletion of the character). 4318 4319 Mapping tables may be dictionaries or sequences. Unmapped character 4320 ordinals (ones which cause a LookupError) are left untouched and 4321 are copied as-is. 4322 4323 */ 4324 PyObject* PyUnicodeUCS4_Translate( 4325 PyObject* str, 4326 PyObject* table, 4327 const(char)* errors); 4328 /// ditto 4329 4330 alias PyUnicodeUCS4_Translate PyUnicode_Translate; 4331 4332 4333 /** Join a sequence of strings using the given separator and return 4334 the resulting Unicode string. */ 4335 PyObject* PyUnicodeUCS4_Join( 4336 PyObject* separator, 4337 PyObject* seq); 4338 /// ditto 4339 4340 alias PyUnicodeUCS4_Join PyUnicode_Join; 4341 4342 4343 /** Return 1 if substr matches str[start:end] at the given tail end, 0 4344 otherwise. */ 4345 Py_ssize_t PyUnicodeUCS4_Tailmatch( 4346 PyObject* str, 4347 PyObject* substr, 4348 Py_ssize_t start, 4349 Py_ssize_t end, 4350 int direction 4351 ); 4352 /// ditto 4353 4354 alias PyUnicodeUCS4_Tailmatch PyUnicode_Tailmatch; 4355 4356 4357 /** Return the first position of substr in str[start:end] using the 4358 given search direction or -1 if not found. -2 is returned in case 4359 an error occurred and an exception is set. */ 4360 Py_ssize_t PyUnicodeUCS4_Find( 4361 PyObject* str, 4362 PyObject* substr, 4363 Py_ssize_t start, 4364 Py_ssize_t end, 4365 int direction 4366 ); 4367 /// ditto 4368 4369 alias PyUnicodeUCS4_Find PyUnicode_Find; 4370 4371 4372 /** Count the number of occurrences of substr in str[start:end]. */ 4373 Py_ssize_t PyUnicodeUCS4_Count( 4374 PyObject* str, 4375 PyObject* substr, 4376 Py_ssize_t start, 4377 Py_ssize_t end); 4378 /// ditto 4379 4380 alias PyUnicodeUCS4_Count PyUnicode_Count; 4381 4382 4383 /** Replace at most maxcount occurrences of substr in str with replstr 4384 and return the resulting Unicode object. */ 4385 PyObject* PyUnicodeUCS4_Replace( 4386 PyObject* str, 4387 PyObject* substr, 4388 PyObject* replstr, 4389 Py_ssize_t maxcount 4390 ); 4391 /// ditto 4392 4393 alias PyUnicodeUCS4_Replace PyUnicode_Replace; 4394 4395 4396 /** Compare two strings and return -1, 0, 1 for less than, equal, 4397 greater than resp. */ 4398 int PyUnicodeUCS4_Compare(PyObject* left, PyObject* right); 4399 /// ditto 4400 4401 alias PyUnicodeUCS4_Compare PyUnicode_Compare; 4402 4403 version(Python_3_0_Or_Later) { 4404 /** Compare two strings and return -1, 0, 1 for less than, equal, 4405 greater than resp. 4406 Params: 4407 left = 4408 right = ASCII-encoded string 4409 */ 4410 /// Availability: 3.* 4411 int PyUnicodeUCS4_CompareWithASCIIString( 4412 PyObject* left, 4413 const(char)* right 4414 ); 4415 /// ditto 4416 4417 alias PyUnicodeUCS4_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 4418 4419 } 4420 4421 version(Python_2_5_Or_Later) { 4422 /** Rich compare two strings and return one of the following: 4423 4424 - NULL in case an exception was raised 4425 - Py_True or Py_False for successfuly comparisons 4426 - Py_NotImplemented in case the type combination is unknown 4427 4428 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 4429 case the conversion of the arguments to Unicode fails with a 4430 UnicodeDecodeError. 4431 4432 Possible values for op: 4433 4434 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 4435 4436 */ 4437 /// Availability: >= 2.5 4438 PyObject* PyUnicodeUCS4_RichCompare( 4439 PyObject* left, 4440 PyObject* right, 4441 int op 4442 ); 4443 /// ditto 4444 4445 alias PyUnicodeUCS4_RichCompare PyUnicode_RichCompare; 4446 4447 } 4448 4449 /** Apply a argument tuple or dictionary to a format string and return 4450 the resulting Unicode string. */ 4451 PyObject* PyUnicodeUCS4_Format(PyObject* format, PyObject* args); 4452 /// ditto 4453 4454 alias PyUnicodeUCS4_Format PyUnicode_Format; 4455 4456 4457 /** Checks whether element is contained in container and return 1/0 4458 accordingly. 4459 4460 element has to coerce to an one element Unicode string. -1 is 4461 returned in case of an error. */ 4462 int PyUnicodeUCS4_Contains(PyObject* container, PyObject* element); 4463 /// ditto 4464 4465 alias PyUnicodeUCS4_Contains PyUnicode_Contains; 4466 4467 4468 version(Python_3_0_Or_Later) { 4469 /** Checks whether argument is a valid identifier. */ 4470 /// Availability: 3.* 4471 int PyUnicodeUCS4_IsIdentifier(PyObject* s); 4472 /// ditto 4473 4474 alias PyUnicodeUCS4_IsIdentifier PyUnicode_IsIdentifier; 4475 4476 } 4477 4478 4479 /// _ 4480 int _PyUnicodeUCS4_IsLowercase(Py_UNICODE ch); 4481 /// ditto 4482 4483 alias _PyUnicodeUCS4_IsLowercase _PyUnicode_IsLowercase; 4484 4485 /// _ 4486 int _PyUnicodeUCS4_IsUppercase(Py_UNICODE ch); 4487 /// ditto 4488 4489 alias _PyUnicodeUCS4_IsUppercase _PyUnicode_IsUppercase; 4490 4491 /// _ 4492 int _PyUnicodeUCS4_IsTitlecase(Py_UNICODE ch); 4493 /// ditto 4494 4495 alias _PyUnicodeUCS4_IsTitlecase _PyUnicode_IsTitlecase; 4496 4497 /// _ 4498 int _PyUnicodeUCS4_IsWhitespace(Py_UNICODE ch); 4499 /// ditto 4500 4501 alias _PyUnicodeUCS4_IsWhitespace _PyUnicode_IsWhitespace; 4502 4503 /// _ 4504 int _PyUnicodeUCS4_IsLinebreak(Py_UNICODE ch); 4505 /// ditto 4506 4507 alias _PyUnicodeUCS4_IsLinebreak _PyUnicode_IsLinebreak; 4508 4509 /// _ 4510 Py_UNICODE _PyUnicodeUCS4_ToLowercase(Py_UNICODE ch); 4511 /// ditto 4512 4513 alias _PyUnicodeUCS4_ToLowercase _PyUnicode_ToLowercase; 4514 4515 /// _ 4516 Py_UNICODE _PyUnicodeUCS4_ToUppercase(Py_UNICODE ch); 4517 /// ditto 4518 4519 alias _PyUnicodeUCS4_ToUppercase _PyUnicode_ToUppercase; 4520 4521 /// _ 4522 Py_UNICODE _PyUnicodeUCS4_ToTitlecase(Py_UNICODE ch); 4523 /// ditto 4524 4525 alias _PyUnicodeUCS4_ToTitlecase _PyUnicode_ToTitlecase; 4526 4527 /// _ 4528 int _PyUnicodeUCS4_ToDecimalDigit(Py_UNICODE ch); 4529 /// ditto 4530 4531 alias _PyUnicodeUCS4_ToDecimalDigit _PyUnicode_ToDecimalDigit; 4532 4533 /// _ 4534 int _PyUnicodeUCS4_ToDigit(Py_UNICODE ch); 4535 /// ditto 4536 4537 alias _PyUnicodeUCS4_ToDigit _PyUnicode_ToDigit; 4538 4539 /// _ 4540 double _PyUnicodeUCS4_ToNumeric(Py_UNICODE ch); 4541 /// ditto 4542 4543 alias _PyUnicodeUCS4_ToNumeric _PyUnicode_ToNumeric; 4544 4545 /// _ 4546 int _PyUnicodeUCS4_IsDecimalDigit(Py_UNICODE ch); 4547 /// ditto 4548 4549 alias _PyUnicodeUCS4_IsDecimalDigit _PyUnicode_IsDecimalDigit; 4550 4551 /// _ 4552 int _PyUnicodeUCS4_IsDigit(Py_UNICODE ch); 4553 /// ditto 4554 4555 alias _PyUnicodeUCS4_IsDigit _PyUnicode_IsDigit; 4556 4557 /// _ 4558 int _PyUnicodeUCS4_IsNumeric(Py_UNICODE ch); 4559 /// ditto 4560 4561 alias _PyUnicodeUCS4_IsNumeric _PyUnicode_IsNumeric; 4562 4563 /// _ 4564 int _PyUnicodeUCS4_IsAlpha(Py_UNICODE ch); 4565 /// ditto 4566 4567 alias _PyUnicodeUCS4_IsAlpha _PyUnicode_IsAlpha; 4568 4569 } 4570 version(Python_3_0_Or_Later) { 4571 /// Availability: 3.* 4572 size_t Py_UNICODE_strlen(const(Py_UNICODE)* u); 4573 4574 /// Availability: 3.* 4575 Py_UNICODE* Py_UNICODE_strcpy(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4576 4577 version(Python_3_2_Or_Later) { 4578 /// Availability: >= 3.2 4579 Py_UNICODE* Py_UNICODE_strcat(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4580 } 4581 4582 /// Availability: 3.* 4583 Py_UNICODE* Py_UNICODE_strncpy( 4584 Py_UNICODE* s1, 4585 const(Py_UNICODE)* s2, 4586 size_t n); 4587 4588 /// Availability: 3.* 4589 int Py_UNICODE_strcmp( 4590 const(Py_UNICODE)* s1, 4591 const(Py_UNICODE)* s2 4592 ); 4593 4594 version(Python_3_2_Or_Later) { 4595 /// Availability: >= 3.2 4596 int Py_UNICODE_strncmp( 4597 const(Py_UNICODE)* s1, 4598 const(Py_UNICODE)* s2, 4599 size_t n 4600 ); 4601 } 4602 4603 /// Availability: 3.* 4604 Py_UNICODE* Py_UNICODE_strchr( 4605 const(Py_UNICODE)* s, 4606 Py_UNICODE c 4607 ); 4608 4609 version(Python_3_2_Or_Later) { 4610 /// Availability: >= 3.2 4611 Py_UNICODE* Py_UNICODE_strrchr( 4612 const(Py_UNICODE)* s, 4613 Py_UNICODE c 4614 ); 4615 } 4616 4617 version(Python_3_5_Or_Later) { 4618 /// Availability: >= 3.5 4619 PyObject* _PyUnicode_FormatLong(PyObject*, int, int, int); 4620 } 4621 4622 version(Python_3_2_Or_Later) { 4623 /** Create a copy of a unicode string ending with a nul character. Return NULL 4624 and raise a MemoryError exception on memory allocation failure, otherwise 4625 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 4626 /// Availability: >= 3.2 4627 4628 Py_UNICODE* PyUnicode_AsUnicodeCopy( 4629 PyObject* unicode 4630 ); 4631 } 4632 } 4633 4634 4635 /// _ 4636 int _PyUnicode_IsTitlecase( 4637 Py_UCS4 ch /* Unicode character */ 4638 ); 4639 4640 /// _ 4641 int _PyUnicode_IsXidStart( 4642 Py_UCS4 ch /* Unicode character */ 4643 ); 4644 /** Externally visible for str.strip(unicode) */ 4645 PyObject* _PyUnicode_XStrip(PyUnicodeObject* self, int striptype, 4646 PyObject *sepobj 4647 ); 4648 version(Python_3_0_Or_Later) { 4649 version(Python_3_2_Or_Later) { 4650 /** Using the current locale, insert the thousands grouping 4651 into the string pointed to by buffer. For the argument descriptions, 4652 see Objects/stringlib/localeutil.h */ 4653 /// Availability: >= 3.2 4654 Py_ssize_t _PyUnicode_InsertThousandsGroupingLocale( 4655 Py_UNICODE* buffer, 4656 Py_ssize_t n_buffer, 4657 Py_UNICODE* digits, 4658 Py_ssize_t n_digits, 4659 Py_ssize_t min_width); 4660 } 4661 4662 /** Using explicit passed-in values, insert the thousands grouping 4663 into the string pointed to by buffer. For the argument descriptions, 4664 see Objects/stringlib/localeutil.h */ 4665 /// Availability: 3.* 4666 Py_ssize_t _PyUnicode_InsertThousandsGrouping( 4667 Py_UNICODE* buffer, 4668 Py_ssize_t n_buffer, 4669 Py_UNICODE* digits, 4670 Py_ssize_t n_digits, 4671 Py_ssize_t min_width, 4672 const(char)* grouping, 4673 const(char)* thousands_sep); 4674 } 4675 4676 version(Python_3_2_Or_Later) { 4677 /// Availability: >= 3.2 4678 PyObject* PyUnicode_TransformDecimalToASCII( 4679 Py_UNICODE *s, /* Unicode buffer */ 4680 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 4681 ); 4682 /* --- File system encoding ---------------------------------------------- */ 4683 4684 /** ParseTuple converter: encode str objects to bytes using 4685 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 4686 /// Availability: >= 3.2 4687 int PyUnicode_FSConverter(PyObject*, void*); 4688 4689 /** ParseTuple converter: decode bytes objects to unicode using 4690 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 4691 /// Availability: >= 3.2 4692 int PyUnicode_FSDecoder(PyObject*, void*); 4693 4694 /** Decode a null-terminated string using Py_FileSystemDefaultEncoding 4695 and the "surrogateescape" error handler. 4696 4697 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4698 encoding. 4699 4700 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 4701 */ 4702 /// Availability: >= 3.2 4703 PyObject* PyUnicode_DecodeFSDefault( 4704 const(char)* s /* encoded string */ 4705 ); 4706 4707 /** Decode a string using Py_FileSystemDefaultEncoding 4708 and the "surrogateescape" error handler. 4709 4710 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4711 encoding. 4712 */ 4713 /// Availability: >= 3.2 4714 PyObject* PyUnicode_DecodeFSDefaultAndSize( 4715 const(char)* s, /* encoded string */ 4716 Py_ssize_t size /* size */ 4717 ); 4718 4719 /** Encode a Unicode object to Py_FileSystemDefaultEncoding with the 4720 "surrogateescape" error handler, and return bytes. 4721 4722 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4723 encoding. 4724 */ 4725 /// Availability: >= 3.2 4726 PyObject* PyUnicode_EncodeFSDefault( 4727 PyObject* unicode 4728 ); 4729 } 4730 4731 /* 4732 alias _PyUnicode_IsWhitespace Py_UNICODE_ISSPACE; 4733 alias _PyUnicode_IsLowercase Py_UNICODE_ISLOWER; 4734 alias _PyUnicode_IsUppercase Py_UNICODE_ISUPPER; 4735 alias _PyUnicode_IsTitlecase Py_UNICODE_ISTITLE; 4736 alias _PyUnicode_IsLinebreak Py_UNICODE_ISLINEBREAK; 4737 alias _PyUnicode_ToLowercase Py_UNICODE_TOLOWER; 4738 alias _PyUnicode_ToUppercase Py_UNICODE_TOUPPER; 4739 alias _PyUnicode_ToTitlecase Py_UNICODE_TOTITLE; 4740 alias _PyUnicode_IsDecimalDigit Py_UNICODE_ISDECIMAL; 4741 alias _PyUnicode_IsDigit Py_UNICODE_ISDIGIT; 4742 alias _PyUnicode_IsNumeric Py_UNICODE_ISNUMERIC; 4743 alias _PyUnicode_ToDecimalDigit Py_UNICODE_TODECIMAL; 4744 alias _PyUnicode_ToDigit Py_UNICODE_TODIGIT; 4745 alias _PyUnicode_ToNumeric Py_UNICODE_TONUMERIC; 4746 alias _PyUnicode_IsAlpha Py_UNICODE_ISALPHA; 4747 */ 4748 4749 /// _ 4750 int Py_UNICODE_ISALNUM()(Py_UNICODE ch) { 4751 return ( 4752 Py_UNICODE_ISALPHA(ch) 4753 || Py_UNICODE_ISDECIMAL(ch) 4754 || Py_UNICODE_ISDIGIT(ch) 4755 || Py_UNICODE_ISNUMERIC(ch) 4756 ); 4757 } 4758 4759 /// _ 4760 void Py_UNICODE_COPY()(void* target, void* source, size_t length) { 4761 memcpy(target, source, cast(uint)(length* Py_UNICODE.sizeof)); 4762 } 4763 4764 /// _ 4765 void Py_UNICODE_FILL()(Py_UNICODE* target, Py_UNICODE value, size_t length) { 4766 for (size_t i = 0; i < length; i++) { 4767 target[i] = value; 4768 } 4769 } 4770 4771 /// _ 4772 int Py_UNICODE_MATCH()(PyUnicodeObject* string, size_t offset, 4773 PyUnicodeObject* substring 4774 ) 4775 { 4776 return ( 4777 (*(string.str + offset) == *(substring.str)) 4778 && !memcmp(string.str + offset, substring.str, 4779 substring.length * Py_UNICODE.sizeof 4780 ) 4781 ); 4782 } 4783 4784