1 /** 2 Mirror _unicodeobject.h 3 4 Unicode API names are mangled to assure that UCS-2 and UCS-4 builds 5 produce different external names and thus cause import errors in 6 case Python interpreters and extensions with mixed compiled in 7 Unicode width assumptions are combined. 8 */ 9 module deimos.python.unicodeobject; 10 11 import core.stdc.stdarg; 12 import core.stdc.string; 13 import core.stdc.stddef : wchar_t; 14 15 import deimos.python.pyport; 16 import deimos.python.object; 17 18 extern(C): 19 // Python-header-file: Include/unicodeobject.h: 20 21 /** Py_UNICODE is the native Unicode storage format (code unit) used by 22 Python and represents a single Unicode element in the Unicode 23 type. */ 24 version (Python_Unicode_UCS2) { 25 version (Windows) { 26 alias wchar_t Py_UNICODE; 27 } else { 28 alias ushort Py_UNICODE; 29 } 30 } else { 31 alias uint Py_UNICODE; 32 } 33 alias Py_UNICODE Py_UCS4; 34 alias ubyte Py_UCS1; 35 alias ushort Py_UCS2; 36 37 version(Python_3_4_Or_Later) { 38 /** There are 4 forms of Unicode strings: 39 - compact ascii: 40 * structure = PyASCIIObject 41 * test: PyUnicode_IS_COMPACT_ASCII(op) 42 * kind = PyUnicode_1BYTE_KIND 43 * compact = 1 44 * ascii = 1 45 * ready = 1 46 * (length is the length of the utf8 and wstr strings) 47 * (data starts just after the structure) 48 * (since ASCII is decoded from UTF-8, the utf8 string are the data) 49 - compact: 50 * structure = PyCompactUnicodeObject 51 * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) 52 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 53 PyUnicode_4BYTE_KIND 54 * compact = 1 55 * ready = 1 56 * ascii = 0 57 * utf8 is not shared with data 58 * utf8_length = 0 if utf8 is NULL 59 * wstr is shared with data and wstr_length=length 60 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 61 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 62 * wstr_length = 0 if wstr is NULL 63 * (data starts just after the structure) 64 - legacy string, not ready: 65 * structure = PyUnicodeObject 66 * test: kind == PyUnicode_WCHAR_KIND 67 * length = 0 (use wstr_length) 68 * hash = -1 69 * kind = PyUnicode_WCHAR_KIND 70 * compact = 0 71 * ascii = 0 72 * ready = 0 73 * interned = SSTATE_NOT_INTERNED 74 * wstr is not NULL 75 * data.any is NULL 76 * utf8 is NULL 77 * utf8_length = 0 78 - legacy string, ready: 79 * structure = PyUnicodeObject structure 80 * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND 81 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 82 PyUnicode_4BYTE_KIND 83 * compact = 0 84 * ready = 1 85 * data.any is not NULL 86 * utf8 is shared and utf8_length = length with data.any if ascii = 1 87 * utf8_length = 0 if utf8 is NULL 88 * wstr is shared with data.any and wstr_length = length 89 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 90 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 91 * wstr_length = 0 if wstr is NULL 92 Compact strings use only one memory block (structure + characters), 93 whereas legacy strings use one block for the structure and one block 94 for characters. 95 Legacy strings are created by PyUnicode_FromUnicode() and 96 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready 97 when PyUnicode_READY() is called. 98 See also _PyUnicode_CheckConsistency(). 99 Availability >= 3.4 100 */ 101 struct PyASCIIObject { 102 mixin PyObject_HEAD; 103 /** Number of code points in the string */ 104 Py_ssize_t length; 105 /** Hash value; -1 if not set */ 106 Py_hash_t hash; 107 /// _ 108 int state; 109 /** wchar_t representation (null-terminated) */ 110 wchar_t* wstr; 111 } 112 113 /// Availability >= 3.4 114 struct PyCompactUnicodeObject { 115 /// _ 116 PyASCIIObject _base; 117 /// _ 118 Py_ssize_t utf8_length; 119 /// _ 120 char* utf8; 121 /// _ 122 Py_ssize_t wstr_length; 123 } 124 125 /** 126 subclass of PyObject. 127 */ 128 struct PyUnicodeObject { 129 PyCompactUnicodeObject _base; 130 PyUnicodeObject_data data; 131 } 132 133 union PyUnicodeObject_data { 134 void* any; 135 Py_UCS1* latin1; 136 Py_UCS2* ucs2; 137 Py_UCS4* ucs4; 138 } 139 }else{ 140 /** 141 subclass of PyObject. 142 */ 143 struct PyUnicodeObject { 144 mixin PyObject_HEAD; 145 /** Length of raw Unicode data in buffer */ 146 Py_ssize_t length; 147 /** Raw Unicode buffer */ 148 Py_UNICODE* str; 149 /** Hash value; -1 if not set */ 150 C_long hash; 151 /** (Default) Encoded version as Python 152 string, or NULL; this is used for 153 implementing the buffer protocol */ 154 PyObject* defenc; 155 } 156 } 157 158 /// _ 159 mixin(PyAPI_DATA!"PyTypeObject PyUnicode_Type"); 160 161 // D translations of C macros: 162 /** Fast access macros */ 163 int PyUnicode_Check()(PyObject* op) { 164 return PyObject_TypeCheck(op, &PyUnicode_Type); 165 } 166 /// ditto 167 int PyUnicode_CheckExact()(PyObject* op) { 168 return Py_TYPE(op) == &PyUnicode_Type; 169 } 170 171 /// ditto 172 size_t PyUnicode_GET_SIZE()(PyUnicodeObject* op) { 173 return op.length; 174 } 175 /// ditto 176 size_t PyUnicode_GET_DATA_SIZE()(PyUnicodeObject* op) { 177 return op.length * Py_UNICODE.sizeof; 178 } 179 /// ditto 180 Py_UNICODE* PyUnicode_AS_UNICODE()(PyUnicodeObject* op) { 181 return op.str; 182 } 183 /// ditto 184 const(char)* PyUnicode_AS_DATA()(PyUnicodeObject* op) { 185 return cast(const(char)*) op.str; 186 } 187 188 /** This Unicode character will be used as replacement character during 189 decoding if the errors argument is set to "replace". Note: the 190 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 191 Unicode 3.0. */ 192 enum Py_UNICODE Py_UNICODE_REPLACEMENT_CHARACTER = 0xFFFD; 193 194 version(Python_3_3_Or_Later) { 195 enum PyUnicode_ = "PyUnicode_"; 196 }else version(Python_Unicode_UCS2) { 197 enum PyUnicode_ = "PyUnicodeUCS2_"; 198 }else{ 199 enum PyUnicode_ = "PyUnicodeUCS4_"; 200 } 201 202 /* 203 this function takes defs PyUnicode_XX and transforms them to 204 PyUnicodeUCS4_XX(); 205 alias PyUnicodeUCS4_XX PyUnicode_XX; 206 207 */ 208 string substitute_and_alias()(string code) { 209 import std.algorithm; 210 import std.array; 211 string[] newcodes; 212 LOOP: 213 while(true) { 214 if(startsWith(code,"/*")) { 215 size_t comm_end_index = countUntil(code[2 .. $], "*/"); 216 if(comm_end_index == -1) break; 217 newcodes ~= code[0 .. comm_end_index]; 218 code = code[comm_end_index .. $]; 219 continue; 220 } 221 if(!(startsWith(code,"PyUnicode_") || startsWith(code,"_PyUnicode"))) { 222 size_t index = 0; 223 while(index < code.length) { 224 if(code[index] == '_') { 225 if(startsWith(code[index .. $], "_PyUnicode_")) { 226 break; 227 } 228 }else if(code[index] == 'P') { 229 if(startsWith(code[index .. $], "PyUnicode_")) { 230 break; 231 } 232 }else if(code[index] == '/') { 233 if(startsWith(code[index .. $], "/*")) { 234 break; 235 } 236 } 237 index++; 238 } 239 if(index == code.length) break; 240 newcodes ~= code[0 .. index]; 241 code = code[index .. $]; 242 continue; 243 } 244 size_t end_index = countUntil(code, "("); 245 if(end_index == -1) break; 246 string alias_name = code[0 .. end_index]; 247 string func_name = replace(alias_name, "PyUnicode_", PyUnicode_); 248 size_t index0 = end_index+1; 249 int parencount = 1; 250 while(parencount && index0 < code.length) { 251 if(startsWith(code[index0 .. $], "/*")) { 252 size_t comm_end_index = countUntil(code[index0+2 .. $], "*/"); 253 if(comm_end_index == -1) break LOOP; 254 index0 += comm_end_index; 255 continue; 256 }else if(code[index0] == '(') { 257 parencount++; 258 index0++; 259 }else if(code[index0] == ')') { 260 parencount--; 261 index0++; 262 }else{ 263 index0++; 264 } 265 } 266 size_t semi = countUntil(code[index0 .. $], ";"); 267 if(semi == -1) break; 268 index0 += semi+1; 269 270 string alias_line = "\nalias " ~ func_name ~ " " ~ alias_name ~ ";\n"; 271 newcodes ~= func_name; 272 newcodes ~= code[end_index .. index0]; 273 newcodes ~= "\n /// ditto \n"; 274 newcodes ~= alias_line; 275 276 code = code[index0 .. $]; 277 } 278 279 string newcode; 280 foreach(c; newcodes) { 281 newcode ~= c; 282 } 283 return newcode; 284 } 285 286 enum string unicode_funs = q{ 287 version(Python_2_6_Or_Later) { 288 289 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 290 size. 291 292 u may be NULL which causes the contents to be undefined. It is the 293 user's responsibility to fill in the needed data afterwards. Note 294 that modifying the Unicode object contents after construction is 295 only allowed if u was set to NULL. 296 297 The buffer is copied into the new object. */ 298 /// Availability: >= 2.6 299 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 300 301 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 302 /// Availability: >= 2.6 303 PyObject* PyUnicode_FromStringAndSize( 304 const(char)*u, /* char buffer */ 305 Py_ssize_t size /* size of buffer */ 306 ); 307 308 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 309 Latin-1 encoded bytes */ 310 /// Availability: >= 2.6 311 PyObject* PyUnicode_FromString( 312 const(char)*u /* string */ 313 ); 314 /// Availability: >= 2.6 315 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 316 /// Availability: >= 2.6 317 PyObject* PyUnicode_FromFormat(const(char)*, ...); 318 319 /** Format the object based on the format_spec, as defined in PEP 3101 320 (Advanced String Formatting). */ 321 /// Availability: >= 2.6 322 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 323 Py_UNICODE *format_spec, 324 Py_ssize_t format_spec_len); 325 /// Availability: >= 2.6 326 int PyUnicode_ClearFreeList(); 327 /** 328 Params: 329 string = UTF-7 encoded string 330 length = size of string 331 error = error handling 332 consumed = bytes consumed 333 */ 334 /// Availability: >= 2.6 335 PyObject* PyUnicode_DecodeUTF7Stateful( 336 const(char)* string, 337 Py_ssize_t length, 338 const(char)*errors, 339 Py_ssize_t *consumed 340 ); 341 /** 342 Params: 343 string = UTF-32 encoded string 344 length = size of string 345 error = error handling 346 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 347 */ 348 /// Availability: >= 2.6 349 PyObject* PyUnicode_DecodeUTF32( 350 const(char)* string, 351 Py_ssize_t length, 352 const(char)*errors, 353 int *byteorder 354 ); 355 356 /** 357 Params: 358 string = UTF-32 encoded string 359 length = size of string 360 error = error handling 361 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 362 */ 363 /// Availability: >= 2.6 364 PyObject* PyUnicode_DecodeUTF32Stateful( 365 const(char)*string, 366 Py_ssize_t length, 367 const(char)*errors, 368 int *byteorder, 369 Py_ssize_t *consumed 370 ); 371 /** Returns a Python string using the UTF-32 encoding in native byte 372 order. The string always starts with a BOM mark. */ 373 /// Availability: >= 2.6 374 375 PyObject* PyUnicode_AsUTF32String( 376 PyObject *unicode 377 ); 378 379 /** Returns a Python string object holding the UTF-32 encoded value of 380 the Unicode data. 381 382 If byteorder is not 0, output is written according to the following 383 byte order: 384 385 byteorder == -1: little endian 386 byteorder == 0: native byte order (writes a BOM mark) 387 byteorder == 1: big endian 388 389 If byteorder is 0, the output string will always start with the 390 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 391 prepended. 392 Params: 393 data = Unicode char buffer 394 length = number of Py_UNICODE chars to encode 395 errors = error handling 396 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 397 398 */ 399 /// Availability: >= 2.6 400 PyObject* PyUnicode_EncodeUTF32( 401 const Py_UNICODE *data, 402 Py_ssize_t length, 403 const(char)* errors, 404 int byteorder 405 ); 406 } 407 408 /** Return a read-only pointer to the Unicode object's internal 409 Py_UNICODE buffer. */ 410 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 411 /** Get the length of the Unicode object. */ 412 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 413 414 /** Get the maximum ordinal for a Unicode character. */ 415 Py_UNICODE PyUnicode_GetMax(); 416 417 /** Resize an already allocated Unicode object to the new size length. 418 419 _*unicode is modified to point to the new (resized) object and 0 420 returned on success. 421 422 This API may only be called by the function which also called the 423 Unicode constructor. The refcount on the object must be 1. Otherwise, 424 an error is returned. 425 426 Error handling is implemented as follows: an exception is set, -1 427 is returned and *unicode left untouched. 428 Params: 429 unicode = pointer to the new unicode object. 430 length = New length. 431 432 */ 433 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 434 /** Coerce obj to an Unicode object and return a reference with 435 _*incremented* refcount. 436 437 Coercion is done in the following way: 438 439 1. String and other char buffer compatible objects are decoded 440 under the assumptions that they contain data using the current 441 default encoding. Decoding is done in "strict" mode. 442 443 2. All other objects (including Unicode objects) raise an 444 exception. 445 446 The API returns NULL in case of an error. The caller is responsible 447 for decref'ing the returned objects. 448 449 */ 450 PyObject* PyUnicode_FromEncodedObject( 451 PyObject* obj, 452 const(char)* encoding, 453 const(char)* errors); 454 455 /** Coerce obj to an Unicode object and return a reference with 456 _*incremented* refcount. 457 458 Unicode objects are passed back as-is (subclasses are converted to 459 true Unicode objects), all other objects are delegated to 460 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 461 using the default encoding as basis for decoding the object. 462 463 The API returns NULL in case of an error. The caller is responsible 464 for decref'ing the returned objects. 465 466 */ 467 PyObject* PyUnicode_FromObject(PyObject* obj); 468 469 /** Create a Unicode Object from the whcar_t buffer w of the given 470 size. 471 472 The buffer is copied into the new object. */ 473 PyObject* PyUnicode_FromWideChar(const(wchar_t)* w, Py_ssize_t size); 474 475 /** Copies the Unicode Object contents into the wchar_t buffer w. At 476 most size wchar_t characters are copied. 477 478 Note that the resulting wchar_t string may or may not be 479 0-terminated. It is the responsibility of the caller to make sure 480 that the wchar_t string is 0-terminated in case this is required by 481 the application. 482 483 Returns the number of wchar_t characters copied (excluding a 484 possibly trailing 0-termination character) or -1 in case of an 485 error. */ 486 Py_ssize_t PyUnicode_AsWideChar( 487 PyUnicodeObject* unicode, 488 const(wchar_t)* w, 489 Py_ssize_t size); 490 491 /** Create a Unicode Object from the given Unicode code point ordinal. 492 493 The ordinal must be in range(0x10000) on narrow Python builds 494 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 495 raised in case it is not. 496 497 */ 498 PyObject* PyUnicode_FromOrdinal(int ordinal); 499 500 /** Return a Python string holding the default encoded value of the 501 Unicode object. 502 503 The resulting string is cached in the Unicode object for subsequent 504 usage by this function. The cached version is needed to implement 505 the character buffer interface and will live (at least) as long as 506 the Unicode object itself. 507 508 The refcount of the string is *not* incremented. 509 510 _*** Exported for internal use by the interpreter only !!! *** 511 512 */ 513 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 514 515 /** Returns the currently active default encoding. 516 517 The default encoding is currently implemented as run-time settable 518 process global. This may change in future versions of the 519 interpreter to become a parameter which is managed on a per-thread 520 basis. 521 522 */ 523 const(char)* PyUnicode_GetDefaultEncoding(); 524 525 /** Sets the currently active default encoding. 526 527 Returns 0 on success, -1 in case of an error. 528 529 */ 530 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 531 532 /** Create a Unicode object by decoding the encoded string s of the 533 given size. 534 Params: 535 s = encoded string 536 size = size of buffer 537 encoding = encoding 538 errors = error handling 539 */ 540 PyObject* PyUnicode_Decode( 541 const(char)* s, 542 Py_ssize_t size, 543 const(char)* encoding, 544 const(char)* errors); 545 546 version(Python_3_6_Or_Later) { 547 /** Decode a Unicode object unicode and return the result as Python 548 object. */ 549 /// Deprecated in 3.6 550 deprecated("Deprecated in 3.6") 551 PyObject* PyUnicode_AsDecodedObject( 552 PyObject* unicode, 553 const(char)* encoding, 554 const(char)* errors 555 ); 556 /** Decode a Unicode object unicode and return the result as Unicode 557 object. */ 558 /// Availability: 3.* 559 560 /// Deprecated in 3.6 561 deprecated("Deprecated in 3.6") 562 PyObject* PyUnicode_AsDecodedUnicode( 563 PyObject* unicode, 564 const(char)* encoding, 565 const(char)* errors 566 ); 567 }else version(Python_3_0_Or_Later) { 568 /** Decode a Unicode object unicode and return the result as Python 569 object. */ 570 /// Availability: 3.* 571 PyObject* PyUnicode_AsDecodedObject( 572 PyObject* unicode, 573 const(char)* encoding, 574 const(char)* errors 575 ); 576 /** Decode a Unicode object unicode and return the result as Unicode 577 object. */ 578 /// Availability: 3.* 579 580 PyObject* PyUnicode_AsDecodedUnicode( 581 PyObject* unicode, 582 const(char)* encoding, 583 const(char)* errors 584 ); 585 } 586 587 /** Encodes a Py_UNICODE buffer of the given size and returns a 588 Python string object. 589 Params: 590 s = Unicode char buffer 591 size = number of Py_UNICODE chars to encode 592 encoding = encoding 593 errors = error handling 594 */ 595 PyObject* PyUnicode_Encode( 596 Py_UNICODE* s, 597 Py_ssize_t size, 598 const(char)* encoding, 599 const(char)* errors); 600 601 version(Python_3_6_Or_Later) { 602 /** Encodes a Unicode object and returns the result as Python object. 603 */ 604 deprecated("Deprecated in 3.6") 605 PyObject* PyUnicode_AsEncodedObject( 606 PyObject* unicode, 607 const(char)* encoding, 608 const(char)* errors); 609 }else{ 610 /** Encodes a Unicode object and returns the result as Python object. 611 */ 612 PyObject* PyUnicode_AsEncodedObject( 613 PyObject* unicode, 614 const(char)* encoding, 615 const(char)* errors); 616 } 617 618 /** Encodes a Unicode object and returns the result as Python string 619 object. */ 620 PyObject* PyUnicode_AsEncodedString( 621 PyObject* unicode, 622 const(char)* encoding, 623 const(char)* errors); 624 625 version(Python_3_0_Or_Later) { 626 /** Encodes a Unicode object and returns the result as Unicode 627 object. */ 628 deprecated("Deprecated in 3.6") 629 PyObject* PyUnicode_AsEncodedUnicode( 630 PyObject* unicode, 631 const(char)* encoding, 632 const(char)* errors 633 ); 634 }else version(Python_3_0_Or_Later) { 635 /** Encodes a Unicode object and returns the result as Unicode 636 object. */ 637 /// Availability: >= 3.* 638 PyObject* PyUnicode_AsEncodedUnicode( 639 PyObject* unicode, 640 const(char)* encoding, 641 const(char)* errors 642 ); 643 } 644 645 /** 646 Params: 647 string = UTF-7 encoded string 648 length = size of string 649 errors = error handling 650 */ 651 PyObject* PyUnicode_DecodeUTF7( 652 const(char)* string, 653 Py_ssize_t length, 654 const(char)* errors); 655 656 /** 657 Params: 658 data = Unicode char buffer 659 length = number of Py_UNICODE chars to encode 660 base64SetO = Encode RFC2152 Set O characters in base64 661 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 662 errors = error handling 663 */ 664 PyObject* PyUnicode_EncodeUTF7( 665 Py_UNICODE* data, 666 Py_ssize_t length, 667 int encodeSetO, 668 int encodeWhiteSpace, 669 const(char)* errors 670 ); 671 672 /// _ 673 PyObject* PyUnicode_DecodeUTF8( 674 const(char)* string, 675 Py_ssize_t length, 676 const(char)* errors); 677 /// _ 678 PyObject* PyUnicode_DecodeUTF8Stateful( 679 const(char)* string, 680 Py_ssize_t length, 681 const(char)* errors, 682 Py_ssize_t* consumed 683 ); 684 /// _ 685 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 686 /// _ 687 PyObject* PyUnicode_EncodeUTF8( 688 Py_UNICODE* data, 689 Py_ssize_t length, 690 const(char) *errors); 691 692 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 693 the corresponding Unicode object. 694 695 errors (if non-NULL) defines the error handling. It defaults 696 to "strict". 697 698 If byteorder is non-NULL, the decoder starts decoding using the 699 given byte order: 700 701 *byteorder == -1: little endian 702 *byteorder == 0: native order 703 *byteorder == 1: big endian 704 705 In native mode, the first two bytes of the stream are checked for a 706 BOM mark. If found, the BOM mark is analysed, the byte order 707 adjusted and the BOM skipped. In the other modes, no BOM mark 708 interpretation is done. After completion, *byteorder is set to the 709 current byte order at the end of input data. 710 711 If byteorder is NULL, the codec starts in native order mode. 712 713 */ 714 PyObject* PyUnicode_DecodeUTF16( 715 const(char)* string, 716 Py_ssize_t length, 717 const(char)* errors, 718 int* byteorder); 719 /** 720 Params: 721 string = UTF-16 encoded string 722 length = size of string 723 errors = error handling 724 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 725 consumed = bytes consumed 726 */ 727 PyObject* PyUnicode_DecodeUTF16Stateful( 728 const(char)* string, 729 Py_ssize_t length, 730 const(char)* errors, 731 int* byteorder, 732 Py_ssize_t* consumed 733 ); 734 /** Returns a Python string using the UTF-16 encoding in native byte 735 order. The string always starts with a BOM mark. */ 736 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 737 /** Returns a Python string object holding the UTF-16 encoded value of 738 the Unicode data. 739 740 If byteorder is not 0, output is written according to the following 741 byte order: 742 743 byteorder == -1: little endian 744 byteorder == 0: native byte order (writes a BOM mark) 745 byteorder == 1: big endian 746 747 If byteorder is 0, the output string will always start with the 748 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 749 prepended. 750 751 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 752 UCS-2. This trick makes it possible to add full UTF-16 capabilities 753 at a later point without compromising the APIs. 754 755 */ 756 PyObject* PyUnicode_EncodeUTF16( 757 Py_UNICODE* data, 758 Py_ssize_t length, 759 const(char)* errors, 760 int byteorder 761 ); 762 763 /// _ 764 PyObject* PyUnicode_DecodeUnicodeEscape( 765 const(char)* string, 766 Py_ssize_t length, 767 const(char)* errors); 768 /// _ 769 PyObject* PyUnicode_AsUnicodeEscapeString( 770 PyObject* unicode); 771 /// _ 772 PyObject* PyUnicode_EncodeUnicodeEscape( 773 Py_UNICODE* data, 774 Py_ssize_t length); 775 /** 776 Params: 777 string = Raw-Unicode-Escape encoded string 778 length = size of string 779 errors = error handling 780 */ 781 PyObject* PyUnicode_DecodeRawUnicodeEscape( 782 const(char)* string, 783 Py_ssize_t length, 784 const(char)* errors); 785 /// _ 786 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 787 /// _ 788 PyObject* PyUnicode_EncodeRawUnicodeEscape( 789 Py_UNICODE* data, Py_ssize_t length); 790 791 /// _ 792 PyObject* _PyUnicode_DecodeUnicodeInternal( 793 const(char)* string, 794 Py_ssize_t length, 795 const(char)* errors); 796 797 /** 798 Params: 799 string = Latin-1 encoded string 800 length = size of string 801 errors = error handling 802 */ 803 PyObject* PyUnicode_DecodeLatin1( 804 const(char)* string, 805 Py_ssize_t length, 806 const(char)* errors); 807 /// _ 808 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 809 /** 810 Params: 811 data = Unicode char buffer 812 length = Number of Py_UNICODE chars to encode 813 errors = error handling 814 */ 815 PyObject* PyUnicode_EncodeLatin1( 816 Py_UNICODE* data, 817 Py_ssize_t length, 818 const(char)* errors); 819 820 /** 821 Params: 822 data = Unicode char buffer 823 length = Number of Py_UNICODE chars to encode 824 errors = error handling 825 */ 826 PyObject* PyUnicode_DecodeASCII( 827 const(char)* string, 828 Py_ssize_t length, 829 const(char)* errors); 830 /// _ 831 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 832 /** 833 Params: 834 data = Unicode char buffer 835 length = Number of Py_UNICODE chars to encode 836 errors = error handling 837 */ 838 PyObject* PyUnicode_EncodeASCII( 839 Py_UNICODE* data, 840 Py_ssize_t length, 841 const(char)* errors); 842 843 /** 844 Params: 845 string = Encoded string 846 length = size of string 847 mapping = character mapping (char ordinal -> unicode ordinal) 848 errors = error handling 849 */ 850 PyObject* PyUnicode_DecodeCharmap( 851 const(char)* string, 852 Py_ssize_t length, 853 PyObject* mapping, 854 const(char)* errors 855 ); 856 /** 857 Params: 858 unicode = Unicode object 859 mapping = character mapping (unicode ordinal -> char ordinal) 860 */ 861 PyObject* PyUnicode_AsCharmapString( 862 PyObject* unicode, 863 PyObject* mapping); 864 /** 865 Params: 866 data = Unicode char buffer 867 length = Number of Py_UNICODE chars to encode 868 mapping = character mapping (unicode ordinal -> char ordinal) 869 errors = error handling 870 */ 871 PyObject* PyUnicode_EncodeCharmap( 872 Py_UNICODE* data, 873 Py_ssize_t length, 874 PyObject* mapping, 875 const(char)* errors 876 ); 877 /** Translate a Py_UNICODE buffer of the given length by applying a 878 character mapping table to it and return the resulting Unicode 879 object. 880 881 The mapping table must map Unicode ordinal integers to Unicode 882 ordinal integers or None (causing deletion of the character). 883 884 Mapping tables may be dictionaries or sequences. Unmapped character 885 ordinals (ones which cause a LookupError) are left untouched and 886 are copied as-is. 887 888 */ 889 PyObject* PyUnicode_TranslateCharmap( 890 Py_UNICODE* data, 891 Py_ssize_t length, 892 PyObject* table, 893 const(char)* errors 894 ); 895 896 version (Windows) { 897 /// Availability: Windows only 898 PyObject* PyUnicode_DecodeMBCS( 899 const(char)* string, 900 Py_ssize_t length, 901 const(char)* errors); 902 /// Availability: Windows only 903 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 904 /// Availability: Windows only 905 PyObject* PyUnicode_EncodeMBCS( 906 Py_UNICODE* data, 907 Py_ssize_t length, 908 const(char)* errors); 909 } 910 /** Takes a Unicode string holding a decimal value and writes it into 911 an output buffer using standard ASCII digit codes. 912 913 The output buffer has to provide at least length+1 bytes of storage 914 area. The output string is 0-terminated. 915 916 The encoder converts whitespace to ' ', decimal characters to their 917 corresponding ASCII digit and all other Latin-1 characters except 918 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 919 are treated as errors. This includes embedded NULL bytes. 920 921 Error handling is defined by the errors argument: 922 923 NULL or "strict": raise a ValueError 924 "ignore": ignore the wrong characters (these are not copied to the 925 output buffer) 926 "replace": replaces illegal characters with '?' 927 928 Returns 0 on success, -1 on failure. 929 930 */ 931 int PyUnicode_EncodeDecimal( 932 Py_UNICODE* s, 933 Py_ssize_t length, 934 char* output, 935 const(char)* errors); 936 937 /** Concat two strings giving a new Unicode string. */ 938 PyObject* PyUnicode_Concat( 939 PyObject* left, 940 PyObject* right); 941 942 version(Python_3_0_Or_Later) { 943 /** Concat two strings and put the result in *pleft 944 (sets *pleft to NULL on error) 945 Params: 946 pleft = Pointer to left string 947 right = Right string 948 */ 949 /// Availability: 3.* 950 951 void PyUnicode_Append( 952 PyObject** pleft, 953 PyObject* right 954 ); 955 956 /** Concat two strings, put the result in *pleft and drop the right object 957 (sets *pleft to NULL on error) 958 Params: 959 pleft = Pointer to left string 960 */ 961 /// Availability: 3.* 962 void PyUnicode_AppendAndDel( 963 PyObject** pleft, 964 PyObject* right 965 ); 966 } 967 968 /** Split a string giving a list of Unicode strings. 969 970 If sep is NULL, splitting will be done at all whitespace 971 substrings. Otherwise, splits occur at the given separator. 972 973 At most maxsplit splits will be done. If negative, no limit is set. 974 975 Separators are not included in the resulting list. 976 977 */ 978 PyObject* PyUnicode_Split( 979 PyObject* s, 980 PyObject* sep, 981 Py_ssize_t maxsplit); 982 983 /** Ditto PyUnicode_Split, but split at line breaks. 984 985 CRLF is considered to be one line break. Line breaks are not 986 included in the resulting list. */ 987 PyObject* PyUnicode_Splitlines( 988 PyObject* s, 989 int keepends); 990 991 version(Python_2_5_Or_Later) { 992 /** Partition a string using a given separator. */ 993 /// Availability: >= 2.5 994 PyObject* PyUnicode_Partition( 995 PyObject* s, 996 PyObject* sep 997 ); 998 999 /** Partition a string using a given separator, searching from the end 1000 of the string. */ 1001 1002 PyObject* PyUnicode_RPartition( 1003 PyObject* s, 1004 PyObject* sep 1005 ); 1006 } 1007 1008 /** Split a string giving a list of Unicode strings. 1009 1010 If sep is NULL, splitting will be done at all whitespace 1011 substrings. Otherwise, splits occur at the given separator. 1012 1013 At most maxsplit splits will be done. But unlike PyUnicode_Split 1014 PyUnicode_RSplit splits from the end of the string. If negative, 1015 no limit is set. 1016 1017 Separators are not included in the resulting list. 1018 1019 */ 1020 PyObject* PyUnicode_RSplit( 1021 PyObject* s, 1022 PyObject* sep, 1023 Py_ssize_t maxsplit); 1024 1025 /** Translate a string by applying a character mapping table to it and 1026 return the resulting Unicode object. 1027 1028 The mapping table must map Unicode ordinal integers to Unicode 1029 ordinal integers or None (causing deletion of the character). 1030 1031 Mapping tables may be dictionaries or sequences. Unmapped character 1032 ordinals (ones which cause a LookupError) are left untouched and 1033 are copied as-is. 1034 1035 */ 1036 PyObject* PyUnicode_Translate( 1037 PyObject* str, 1038 PyObject* table, 1039 const(char)* errors); 1040 1041 /** Join a sequence of strings using the given separator and return 1042 the resulting Unicode string. */ 1043 PyObject* PyUnicode_Join( 1044 PyObject* separator, 1045 PyObject* seq); 1046 1047 /** Return 1 if substr matches str[start:end] at the given tail end, 0 1048 otherwise. */ 1049 Py_ssize_t PyUnicode_Tailmatch( 1050 PyObject* str, 1051 PyObject* substr, 1052 Py_ssize_t start, 1053 Py_ssize_t end, 1054 int direction 1055 ); 1056 1057 /** Return the first position of substr in str[start:end] using the 1058 given search direction or -1 if not found. -2 is returned in case 1059 an error occurred and an exception is set. */ 1060 Py_ssize_t PyUnicode_Find( 1061 PyObject* str, 1062 PyObject* substr, 1063 Py_ssize_t start, 1064 Py_ssize_t end, 1065 int direction 1066 ); 1067 1068 /** Count the number of occurrences of substr in str[start:end]. */ 1069 Py_ssize_t PyUnicode_Count( 1070 PyObject* str, 1071 PyObject* substr, 1072 Py_ssize_t start, 1073 Py_ssize_t end); 1074 1075 /** Replace at most maxcount occurrences of substr in str with replstr 1076 and return the resulting Unicode object. */ 1077 PyObject* PyUnicode_Replace( 1078 PyObject* str, 1079 PyObject* substr, 1080 PyObject* replstr, 1081 Py_ssize_t maxcount 1082 ); 1083 1084 /** Compare two strings and return -1, 0, 1 for less than, equal, 1085 greater than resp. */ 1086 int PyUnicode_Compare(PyObject* left, PyObject* right); 1087 version(Python_3_0_Or_Later) { 1088 /** Compare two strings and return -1, 0, 1 for less than, equal, 1089 greater than resp. 1090 Params: 1091 left = 1092 right = ASCII-encoded string 1093 */ 1094 /// Availability: 3.* 1095 int PyUnicode_CompareWithASCIIString( 1096 PyObject* left, 1097 const(char)* right 1098 ); 1099 } 1100 1101 version(Python_2_5_Or_Later) { 1102 /** Rich compare two strings and return one of the following: 1103 1104 - NULL in case an exception was raised 1105 - Py_True or Py_False for successfuly comparisons 1106 - Py_NotImplemented in case the type combination is unknown 1107 1108 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 1109 case the conversion of the arguments to Unicode fails with a 1110 UnicodeDecodeError. 1111 1112 Possible values for op: 1113 1114 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 1115 1116 */ 1117 /// Availability: >= 2.5 1118 PyObject* PyUnicode_RichCompare( 1119 PyObject* left, 1120 PyObject* right, 1121 int op 1122 ); 1123 } 1124 1125 /** Apply a argument tuple or dictionary to a format string and return 1126 the resulting Unicode string. */ 1127 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 1128 1129 /** Checks whether element is contained in container and return 1/0 1130 accordingly. 1131 1132 element has to coerce to an one element Unicode string. -1 is 1133 returned in case of an error. */ 1134 int PyUnicode_Contains(PyObject* container, PyObject* element); 1135 1136 version(Python_3_0_Or_Later) { 1137 /** Checks whether argument is a valid identifier. */ 1138 /// Availability: 3.* 1139 int PyUnicode_IsIdentifier(PyObject* s); 1140 } 1141 1142 1143 /// _ 1144 int _PyUnicode_IsLowercase(Py_UNICODE ch); 1145 /// _ 1146 int _PyUnicode_IsUppercase(Py_UNICODE ch); 1147 /// _ 1148 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 1149 /// _ 1150 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 1151 /// _ 1152 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 1153 /// _ 1154 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 1155 /// _ 1156 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 1157 /// _ 1158 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 1159 /// _ 1160 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 1161 /// _ 1162 int _PyUnicode_ToDigit(Py_UNICODE ch); 1163 /// _ 1164 double _PyUnicode_ToNumeric(Py_UNICODE ch); 1165 /// _ 1166 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 1167 /// _ 1168 int _PyUnicode_IsDigit(Py_UNICODE ch); 1169 /// _ 1170 int _PyUnicode_IsNumeric(Py_UNICODE ch); 1171 /// _ 1172 int _PyUnicode_IsAlpha(Py_UNICODE ch); 1173 1174 }; 1175 1176 /* 1177 pragma(msg,substitute_and_alias(unicode_funs)); 1178 mixin(substitute_and_alias(unicode_funs)); 1179 */ 1180 1181 // waaaa! calling substitute_and_alias breaks linking! 1182 // oh, well. this is probably faster anyways. 1183 // following code is generated by substitute_and_alias. 1184 // don't modify it; modify unicode_funs! 1185 version(Python_3_3_Or_Later) { 1186 version(Python_2_6_Or_Later) { 1187 1188 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 1189 size. 1190 1191 u may be NULL which causes the contents to be undefined. It is the 1192 user's responsibility to fill in the needed data afterwards. Note 1193 that modifying the Unicode object contents after construction is 1194 only allowed if u was set to NULL. 1195 1196 The buffer is copied into the new object. */ 1197 /// Availability: >= 2.6 1198 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 1199 1200 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 1201 /// Availability: >= 2.6 1202 PyObject* PyUnicode_FromStringAndSize( 1203 const(char)*u, /* char buffer */ 1204 Py_ssize_t size /* size of buffer */ 1205 ); 1206 1207 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 1208 Latin-1 encoded bytes */ 1209 /// Availability: >= 2.6 1210 PyObject* PyUnicode_FromString( 1211 const(char)*u /* string */ 1212 ); 1213 1214 /// Availability: >= 2.6 1215 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 1216 1217 /// Availability: >= 2.6 1218 PyObject* PyUnicode_FromFormat(const(char)*, ...); 1219 1220 /** Format the object based on the format_spec, as defined in PEP 3101 1221 (Advanced String Formatting). */ 1222 /// Availability: >= 2.6 1223 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 1224 Py_UNICODE *format_spec, 1225 Py_ssize_t format_spec_len); 1226 1227 /// Availability: >= 2.6 1228 int PyUnicode_ClearFreeList(); 1229 1230 /** 1231 Params: 1232 string = UTF-7 encoded string 1233 length = size of string 1234 error = error handling 1235 consumed = bytes consumed 1236 */ 1237 /// Availability: >= 2.6 1238 PyObject* PyUnicode_DecodeUTF7Stateful( 1239 const(char)* string, 1240 Py_ssize_t length, 1241 const(char)*errors, 1242 Py_ssize_t *consumed 1243 ); 1244 1245 /** 1246 Params: 1247 string = UTF-32 encoded string 1248 length = size of string 1249 error = error handling 1250 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1251 */ 1252 /// Availability: >= 2.6 1253 PyObject* PyUnicode_DecodeUTF32( 1254 const(char)* string, 1255 Py_ssize_t length, 1256 const(char)*errors, 1257 int *byteorder 1258 ); 1259 1260 /** 1261 Params: 1262 string = UTF-32 encoded string 1263 length = size of string 1264 error = error handling 1265 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1266 */ 1267 /// Availability: >= 2.6 1268 PyObject* PyUnicode_DecodeUTF32Stateful( 1269 const(char)*string, 1270 Py_ssize_t length, 1271 const(char)*errors, 1272 int *byteorder, 1273 Py_ssize_t *consumed 1274 ); 1275 1276 /** Returns a Python string using the UTF-32 encoding in native byte 1277 order. The string always starts with a BOM mark. */ 1278 /// Availability: >= 2.6 1279 1280 PyObject* PyUnicode_AsUTF32String( 1281 PyObject *unicode 1282 ); 1283 1284 /** Returns a Python string object holding the UTF-32 encoded value of 1285 the Unicode data. 1286 1287 If byteorder is not 0, output is written according to the following 1288 byte order: 1289 1290 byteorder == -1: little endian 1291 byteorder == 0: native byte order (writes a BOM mark) 1292 byteorder == 1: big endian 1293 1294 If byteorder is 0, the output string will always start with the 1295 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1296 prepended. 1297 Params: 1298 data = Unicode char buffer 1299 length = number of Py_UNICODE chars to encode 1300 errors = error handling 1301 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 1302 1303 */ 1304 /// Availability: >= 2.6 1305 PyObject* PyUnicode_EncodeUTF32( 1306 const Py_UNICODE *data, 1307 Py_ssize_t length, 1308 const(char)* errors, 1309 int byteorder 1310 ); 1311 1312 } 1313 1314 /** Return a read-only pointer to the Unicode object's internal 1315 Py_UNICODE buffer. */ 1316 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 1317 1318 /** Get the length of the Unicode object. */ 1319 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 1320 1321 /** Get the maximum ordinal for a Unicode character. */ 1322 Py_UNICODE PyUnicode_GetMax(); 1323 1324 /** Resize an already allocated Unicode object to the new size length. 1325 1326 _*unicode is modified to point to the new (resized) object and 0 1327 returned on success. 1328 1329 This API may only be called by the function which also called the 1330 Unicode constructor. The refcount on the object must be 1. Otherwise, 1331 an error is returned. 1332 1333 Error handling is implemented as follows: an exception is set, -1 1334 is returned and *unicode left untouched. 1335 Params: 1336 unicode = pointer to the new unicode object. 1337 length = New length. 1338 1339 */ 1340 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 1341 1342 /** Coerce obj to an Unicode object and return a reference with 1343 _*incremented* refcount. 1344 1345 Coercion is done in the following way: 1346 1347 1. String and other char buffer compatible objects are decoded 1348 under the assumptions that they contain data using the current 1349 default encoding. Decoding is done in "strict" mode. 1350 1351 2. All other objects (including Unicode objects) raise an 1352 exception. 1353 1354 The API returns NULL in case of an error. The caller is responsible 1355 for decref'ing the returned objects. 1356 1357 */ 1358 PyObject* PyUnicode_FromEncodedObject( 1359 PyObject* obj, 1360 const(char)* encoding, 1361 const(char)* errors); 1362 1363 /** Coerce obj to an Unicode object and return a reference with 1364 _*incremented* refcount. 1365 1366 Unicode objects are passed back as-is (subclasses are converted to 1367 true Unicode objects), all other objects are delegated to 1368 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 1369 using the default encoding as basis for decoding the object. 1370 1371 The API returns NULL in case of an error. The caller is responsible 1372 for decref'ing the returned objects. 1373 1374 */ 1375 PyObject* PyUnicode_FromObject(PyObject* obj); 1376 1377 /** Create a Unicode Object from the whcar_t buffer w of the given 1378 size. 1379 1380 The buffer is copied into the new object. */ 1381 PyObject* PyUnicode_FromWideChar(const(wchar)* w, Py_ssize_t size); 1382 1383 /** Copies the Unicode Object contents into the wchar_t buffer w. At 1384 most size wchar_t characters are copied. 1385 1386 Note that the resulting wchar_t string may or may not be 1387 0-terminated. It is the responsibility of the caller to make sure 1388 that the wchar_t string is 0-terminated in case this is required by 1389 the application. 1390 1391 Returns the number of wchar_t characters copied (excluding a 1392 possibly trailing 0-termination character) or -1 in case of an 1393 error. */ 1394 Py_ssize_t PyUnicode_AsWideChar( 1395 PyUnicodeObject* unicode, 1396 const(wchar)* w, 1397 Py_ssize_t size); 1398 1399 /** Create a Unicode Object from the given Unicode code point ordinal. 1400 1401 The ordinal must be in range(0x10000) on narrow Python builds 1402 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 1403 raised in case it is not. 1404 1405 */ 1406 PyObject* PyUnicode_FromOrdinal(int ordinal); 1407 1408 /** Return a Python string holding the default encoded value of the 1409 Unicode object. 1410 1411 The resulting string is cached in the Unicode object for subsequent 1412 usage by this function. The cached version is needed to implement 1413 the character buffer interface and will live (at least) as long as 1414 the Unicode object itself. 1415 1416 The refcount of the string is *not* incremented. 1417 1418 _*** Exported for internal use by the interpreter only !!! *** 1419 1420 */ 1421 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 1422 1423 /** Returns the currently active default encoding. 1424 1425 The default encoding is currently implemented as run-time settable 1426 process global. This may change in future versions of the 1427 interpreter to become a parameter which is managed on a per-thread 1428 basis. 1429 1430 */ 1431 const(char)* PyUnicode_GetDefaultEncoding(); 1432 1433 /** Sets the currently active default encoding. 1434 1435 Returns 0 on success, -1 in case of an error. 1436 1437 */ 1438 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 1439 1440 /** Create a Unicode object by decoding the encoded string s of the 1441 given size. 1442 Params: 1443 s = encoded string 1444 size = size of buffer 1445 encoding = encoding 1446 errors = error handling 1447 */ 1448 PyObject* PyUnicode_Decode( 1449 const(char)* s, 1450 Py_ssize_t size, 1451 const(char)* encoding, 1452 const(char)* errors); 1453 1454 version(Python_3_0_Or_Later) { 1455 /** Decode a Unicode object unicode and return the result as Python 1456 object. */ 1457 /// Availability: 3.* 1458 1459 PyObject* PyUnicode_AsDecodedObject( 1460 PyObject* unicode, 1461 const(char)* encoding, 1462 const(char)* errors 1463 ); 1464 1465 /** Decode a Unicode object unicode and return the result as Unicode 1466 object. */ 1467 /// Availability: 3.* 1468 1469 PyObject* PyUnicode_AsDecodedUnicode( 1470 PyObject* unicode, 1471 const(char)* encoding, 1472 const(char)* errors 1473 ); 1474 1475 } 1476 1477 /** Encodes a Py_UNICODE buffer of the given size and returns a 1478 Python string object. 1479 Params: 1480 s = Unicode char buffer 1481 size = number of Py_UNICODE chars to encode 1482 encoding = encoding 1483 errors = error handling 1484 */ 1485 PyObject* PyUnicode_Encode( 1486 Py_UNICODE* s, 1487 Py_ssize_t size, 1488 const(char)* encoding, 1489 const(char)* errors); 1490 1491 /** Encodes a Unicode object and returns the result as Python object. 1492 */ 1493 PyObject* PyUnicode_AsEncodedObject( 1494 PyObject* unicode, 1495 const(char)* encoding, 1496 const(char)* errors); 1497 1498 /** Encodes a Unicode object and returns the result as Python string 1499 object. */ 1500 PyObject* PyUnicode_AsEncodedString( 1501 PyObject* unicode, 1502 const(char)* encoding, 1503 const(char)* errors); 1504 1505 version(Python_3_0_Or_Later) { 1506 /** Encodes a Unicode object and returns the result as Unicode 1507 object. */ 1508 /// Availability: >= 3.* 1509 PyObject* PyUnicode_AsEncodedUnicode( 1510 PyObject* unicode, 1511 const(char)* encoding, 1512 const(char)* errors 1513 ); 1514 1515 } 1516 1517 /** 1518 Params: 1519 string = UTF-7 encoded string 1520 length = size of string 1521 errors = error handling 1522 */ 1523 PyObject* PyUnicode_DecodeUTF7( 1524 const(char)* string, 1525 Py_ssize_t length, 1526 const(char)* errors); 1527 1528 /** 1529 Params: 1530 data = Unicode char buffer 1531 length = number of Py_UNICODE chars to encode 1532 base64SetO = Encode RFC2152 Set O characters in base64 1533 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 1534 errors = error handling 1535 */ 1536 PyObject* PyUnicode_EncodeUTF7( 1537 Py_UNICODE* data, 1538 Py_ssize_t length, 1539 int encodeSetO, 1540 int encodeWhiteSpace, 1541 const(char)* errors 1542 ); 1543 1544 /// _ 1545 PyObject* PyUnicode_DecodeUTF8( 1546 const(char)* string, 1547 Py_ssize_t length, 1548 const(char)* errors); 1549 1550 /// _ 1551 PyObject* PyUnicode_DecodeUTF8Stateful( 1552 const(char)* string, 1553 Py_ssize_t length, 1554 const(char)* errors, 1555 Py_ssize_t* consumed 1556 ); 1557 1558 /// _ 1559 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 1560 1561 /// _ 1562 PyObject* PyUnicode_EncodeUTF8( 1563 Py_UNICODE* data, 1564 Py_ssize_t length, 1565 const(char) *errors); 1566 1567 1568 1569 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 1570 the corresponding Unicode object. 1571 1572 errors (if non-NULL) defines the error handling. It defaults 1573 to "strict". 1574 1575 If byteorder is non-NULL, the decoder starts decoding using the 1576 given byte order: 1577 1578 *byteorder == -1: little endian 1579 *byteorder == 0: native order 1580 *byteorder == 1: big endian 1581 1582 In native mode, the first two bytes of the stream are checked for a 1583 BOM mark. If found, the BOM mark is analysed, the byte order 1584 adjusted and the BOM skipped. In the other modes, no BOM mark 1585 interpretation is done. After completion, *byteorder is set to the 1586 current byte order at the end of input data. 1587 1588 If byteorder is NULL, the codec starts in native order mode. 1589 1590 */ 1591 PyObject* PyUnicode_DecodeUTF16( 1592 const(char)* string, 1593 Py_ssize_t length, 1594 const(char)* errors, 1595 int* byteorder); 1596 1597 1598 /** 1599 Params: 1600 string = UTF-16 encoded string 1601 length = size of string 1602 errors = error handling 1603 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1604 consumed = bytes consumed 1605 */ 1606 PyObject* PyUnicode_DecodeUTF16Stateful( 1607 const(char)* string, 1608 Py_ssize_t length, 1609 const(char)* errors, 1610 int* byteorder, 1611 Py_ssize_t* consumed 1612 ); 1613 1614 1615 /** Returns a Python string using the UTF-16 encoding in native byte 1616 order. The string always starts with a BOM mark. */ 1617 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 1618 1619 1620 /** Returns a Python string object holding the UTF-16 encoded value of 1621 the Unicode data. 1622 1623 If byteorder is not 0, output is written according to the following 1624 byte order: 1625 1626 byteorder == -1: little endian 1627 byteorder == 0: native byte order (writes a BOM mark) 1628 byteorder == 1: big endian 1629 1630 If byteorder is 0, the output string will always start with the 1631 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1632 prepended. 1633 1634 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1635 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1636 at a later point without compromising the APIs. 1637 1638 */ 1639 PyObject* PyUnicode_EncodeUTF16( 1640 Py_UNICODE* data, 1641 Py_ssize_t length, 1642 const(char)* errors, 1643 int byteorder 1644 ); 1645 1646 1647 1648 /// _ 1649 PyObject* PyUnicode_DecodeUnicodeEscape( 1650 const(char)* string, 1651 Py_ssize_t length, 1652 const(char)* errors); 1653 1654 1655 /// _ 1656 PyObject* PyUnicode_AsUnicodeEscapeString( 1657 PyObject* unicode); 1658 1659 1660 /// _ 1661 PyObject* PyUnicode_EncodeUnicodeEscape( 1662 Py_UNICODE* data, 1663 Py_ssize_t length); 1664 1665 1666 /** 1667 Params: 1668 string = Raw-Unicode-Escape encoded string 1669 length = size of string 1670 errors = error handling 1671 */ 1672 PyObject* PyUnicode_DecodeRawUnicodeEscape( 1673 const(char)* string, 1674 Py_ssize_t length, 1675 const(char)* errors); 1676 1677 /// _ 1678 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 1679 1680 /// _ 1681 PyObject* PyUnicode_EncodeRawUnicodeEscape( 1682 Py_UNICODE* data, Py_ssize_t length); 1683 1684 /// _ 1685 PyObject* _PyUnicode_DecodeUnicodeInternal( 1686 const(char)* string, 1687 Py_ssize_t length, 1688 const(char)* errors); 1689 1690 /** 1691 Params: 1692 string = Latin-1 encoded string 1693 length = size of string 1694 errors = error handling 1695 */ 1696 PyObject* PyUnicode_DecodeLatin1( 1697 const(char)* string, 1698 Py_ssize_t length, 1699 const(char)* errors); 1700 1701 /// _ 1702 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 1703 1704 /** 1705 Params: 1706 data = Unicode char buffer 1707 length = Number of Py_UNICODE chars to encode 1708 errors = error handling 1709 */ 1710 PyObject* PyUnicode_EncodeLatin1( 1711 Py_UNICODE* data, 1712 Py_ssize_t length, 1713 const(char)* errors); 1714 1715 /** 1716 Params: 1717 data = Unicode char buffer 1718 length = Number of Py_UNICODE chars to encode 1719 errors = error handling 1720 */ 1721 PyObject* PyUnicode_DecodeASCII( 1722 const(char)* string, 1723 Py_ssize_t length, 1724 const(char)* errors); 1725 1726 /// _ 1727 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 1728 1729 /** 1730 Params: 1731 data = Unicode char buffer 1732 length = Number of Py_UNICODE chars to encode 1733 errors = error handling 1734 */ 1735 PyObject* PyUnicode_EncodeASCII( 1736 Py_UNICODE* data, 1737 Py_ssize_t length, 1738 const(char)* errors); 1739 1740 /** 1741 Params: 1742 string = Encoded string 1743 length = size of string 1744 mapping = character mapping (char ordinal -> unicode ordinal) 1745 errors = error handling 1746 */ 1747 PyObject* PyUnicode_DecodeCharmap( 1748 const(char)* string, 1749 Py_ssize_t length, 1750 PyObject* mapping, 1751 const(char)* errors 1752 ); 1753 1754 /** 1755 Params: 1756 unicode = Unicode object 1757 mapping = character mapping (unicode ordinal -> char ordinal) 1758 */ 1759 PyObject* PyUnicode_AsCharmapString( 1760 PyObject* unicode, 1761 PyObject* mapping); 1762 1763 /** 1764 Params: 1765 data = Unicode char buffer 1766 length = Number of Py_UNICODE chars to encode 1767 mapping = character mapping (unicode ordinal -> char ordinal) 1768 errors = error handling 1769 */ 1770 PyObject* PyUnicode_EncodeCharmap( 1771 Py_UNICODE* data, 1772 Py_ssize_t length, 1773 PyObject* mapping, 1774 const(char)* errors 1775 ); 1776 1777 /** Translate a Py_UNICODE buffer of the given length by applying a 1778 character mapping table to it and return the resulting Unicode 1779 object. 1780 1781 The mapping table must map Unicode ordinal integers to Unicode 1782 ordinal integers or None (causing deletion of the character). 1783 1784 Mapping tables may be dictionaries or sequences. Unmapped character 1785 ordinals (ones which cause a LookupError) are left untouched and 1786 are copied as-is. 1787 1788 */ 1789 PyObject* PyUnicode_TranslateCharmap( 1790 Py_UNICODE* data, 1791 Py_ssize_t length, 1792 PyObject* table, 1793 const(char)* errors 1794 ); 1795 1796 version (Windows) { 1797 /// Availability: Windows only 1798 PyObject* PyUnicode_DecodeMBCS( 1799 const(char)* string, 1800 Py_ssize_t length, 1801 const(char)* errors); 1802 1803 /// Availability: Windows only 1804 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 1805 1806 /// Availability: Windows only 1807 PyObject* PyUnicode_EncodeMBCS( 1808 Py_UNICODE* data, 1809 Py_ssize_t length, 1810 const(char)* errors); 1811 1812 } 1813 /** Takes a Unicode string holding a decimal value and writes it into 1814 an output buffer using standard ASCII digit codes. 1815 1816 The output buffer has to provide at least length+1 bytes of storage 1817 area. The output string is 0-terminated. 1818 1819 The encoder converts whitespace to ' ', decimal characters to their 1820 corresponding ASCII digit and all other Latin-1 characters except 1821 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1822 are treated as errors. This includes embedded NULL bytes. 1823 1824 Error handling is defined by the errors argument: 1825 1826 NULL or "strict": raise a ValueError 1827 "ignore": ignore the wrong characters (these are not copied to the 1828 output buffer) 1829 "replace": replaces illegal characters with '?' 1830 1831 Returns 0 on success, -1 on failure. 1832 1833 */ 1834 int PyUnicode_EncodeDecimal( 1835 Py_UNICODE* s, 1836 Py_ssize_t length, 1837 char* output, 1838 const(char)* errors); 1839 1840 /** Concat two strings giving a new Unicode string. */ 1841 PyObject* PyUnicode_Concat( 1842 PyObject* left, 1843 PyObject* right); 1844 1845 version(Python_3_0_Or_Later) { 1846 /** Concat two strings and put the result in *pleft 1847 (sets *pleft to NULL on error) 1848 Params: 1849 pleft = Pointer to left string 1850 right = Right string 1851 */ 1852 /// Availability: 3.* 1853 1854 void PyUnicode_Append( 1855 PyObject** pleft, 1856 PyObject* right 1857 ); 1858 1859 /** Concat two strings, put the result in *pleft and drop the right object 1860 (sets *pleft to NULL on error) 1861 Params: 1862 pleft = Pointer to left string 1863 */ 1864 /// Availability: 3.* 1865 void PyUnicode_AppendAndDel( 1866 PyObject** pleft, 1867 PyObject* right 1868 ); 1869 1870 } 1871 1872 /** Split a string giving a list of Unicode strings. 1873 1874 If sep is NULL, splitting will be done at all whitespace 1875 substrings. Otherwise, splits occur at the given separator. 1876 1877 At most maxsplit splits will be done. If negative, no limit is set. 1878 1879 Separators are not included in the resulting list. 1880 1881 */ 1882 PyObject* PyUnicode_Split( 1883 PyObject* s, 1884 PyObject* sep, 1885 Py_ssize_t maxsplit); 1886 1887 /** Ditto PyUnicode_Split, but split at line breaks. 1888 1889 CRLF is considered to be one line break. Line breaks are not 1890 included in the resulting list. */ 1891 PyObject* PyUnicode_Splitlines( 1892 PyObject* s, 1893 int keepends); 1894 1895 version(Python_2_5_Or_Later) { 1896 /** Partition a string using a given separator. */ 1897 /// Availability: >= 2.5 1898 PyObject* PyUnicode_Partition( 1899 PyObject* s, 1900 PyObject* sep 1901 ); 1902 1903 1904 /** Partition a string using a given separator, searching from the end 1905 of the string. */ 1906 1907 PyObject* PyUnicode_RPartition( 1908 PyObject* s, 1909 PyObject* sep 1910 ); 1911 1912 } 1913 1914 /** Split a string giving a list of Unicode strings. 1915 1916 If sep is NULL, splitting will be done at all whitespace 1917 substrings. Otherwise, splits occur at the given separator. 1918 1919 At most maxsplit splits will be done. But unlike PyUnicode_Split 1920 PyUnicode_RSplit splits from the end of the string. If negative, 1921 no limit is set. 1922 1923 Separators are not included in the resulting list. 1924 1925 */ 1926 PyObject* PyUnicode_RSplit( 1927 PyObject* s, 1928 PyObject* sep, 1929 Py_ssize_t maxsplit); 1930 1931 1932 /** Translate a string by applying a character mapping table to it and 1933 return the resulting Unicode object. 1934 1935 The mapping table must map Unicode ordinal integers to Unicode 1936 ordinal integers or None (causing deletion of the character). 1937 1938 Mapping tables may be dictionaries or sequences. Unmapped character 1939 ordinals (ones which cause a LookupError) are left untouched and 1940 are copied as-is. 1941 1942 */ 1943 PyObject* PyUnicode_Translate( 1944 PyObject* str, 1945 PyObject* table, 1946 const(char)* errors); 1947 1948 /** Join a sequence of strings using the given separator and return 1949 the resulting Unicode string. */ 1950 PyObject* PyUnicode_Join( 1951 PyObject* separator, 1952 PyObject* seq); 1953 1954 /** Return 1 if substr matches str[start:end] at the given tail end, 0 1955 otherwise. */ 1956 Py_ssize_t PyUnicode_Tailmatch( 1957 PyObject* str, 1958 PyObject* substr, 1959 Py_ssize_t start, 1960 Py_ssize_t end, 1961 int direction 1962 ); 1963 1964 1965 /** Return the first position of substr in str[start:end] using the 1966 given search direction or -1 if not found. -2 is returned in case 1967 an error occurred and an exception is set. */ 1968 Py_ssize_t PyUnicode_Find( 1969 PyObject* str, 1970 PyObject* substr, 1971 Py_ssize_t start, 1972 Py_ssize_t end, 1973 int direction 1974 ); 1975 1976 /** Count the number of occurrences of substr in str[start:end]. */ 1977 Py_ssize_t PyUnicode_Count( 1978 PyObject* str, 1979 PyObject* substr, 1980 Py_ssize_t start, 1981 Py_ssize_t end); 1982 1983 /** Replace at most maxcount occurrences of substr in str with replstr 1984 and return the resulting Unicode object. */ 1985 PyObject* PyUnicode_Replace( 1986 PyObject* str, 1987 PyObject* substr, 1988 PyObject* replstr, 1989 Py_ssize_t maxcount 1990 ); 1991 1992 /** Compare two strings and return -1, 0, 1 for less than, equal, 1993 greater than resp. */ 1994 int PyUnicode_Compare(PyObject* left, PyObject* right); 1995 1996 version(Python_3_0_Or_Later) { 1997 /** Compare two strings and return -1, 0, 1 for less than, equal, 1998 greater than resp. 1999 Params: 2000 left = 2001 right = ASCII-encoded string 2002 */ 2003 /// Availability: 3.* 2004 int PyUnicode_CompareWithASCIIString( 2005 PyObject* left, 2006 const(char)* right 2007 ); 2008 } 2009 2010 version(Python_2_5_Or_Later) { 2011 /** Rich compare two strings and return one of the following: 2012 2013 - NULL in case an exception was raised 2014 - Py_True or Py_False for successfuly comparisons 2015 - Py_NotImplemented in case the type combination is unknown 2016 2017 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 2018 case the conversion of the arguments to Unicode fails with a 2019 UnicodeDecodeError. 2020 2021 Possible values for op: 2022 2023 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 2024 2025 */ 2026 /// Availability: >= 2.5 2027 PyObject* PyUnicode_RichCompare( 2028 PyObject* left, 2029 PyObject* right, 2030 int op 2031 ); 2032 } 2033 2034 /** Apply a argument tuple or dictionary to a format string and return 2035 the resulting Unicode string. */ 2036 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 2037 2038 /** Checks whether element is contained in container and return 1/0 2039 accordingly. 2040 2041 element has to coerce to an one element Unicode string. -1 is 2042 returned in case of an error. */ 2043 int PyUnicode_Contains(PyObject* container, PyObject* element); 2044 2045 version(Python_3_0_Or_Later) { 2046 /** Checks whether argument is a valid identifier. */ 2047 /// Availability: 3.* 2048 int PyUnicode_IsIdentifier(PyObject* s); 2049 } 2050 2051 2052 /// _ 2053 int _PyUnicode_IsLowercase(Py_UNICODE ch); 2054 2055 /// _ 2056 int _PyUnicode_IsUppercase(Py_UNICODE ch); 2057 2058 /// _ 2059 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 2060 2061 /// _ 2062 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 2063 2064 /// _ 2065 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 2066 2067 /// _ 2068 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 2069 2070 /// _ 2071 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 2072 2073 /// _ 2074 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 2075 2076 /// _ 2077 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 2078 2079 /// _ 2080 int _PyUnicode_ToDigit(Py_UNICODE ch); 2081 2082 /// _ 2083 double _PyUnicode_ToNumeric(Py_UNICODE ch); 2084 2085 /// _ 2086 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 2087 2088 /// _ 2089 int _PyUnicode_IsDigit(Py_UNICODE ch); 2090 2091 /// _ 2092 int _PyUnicode_IsNumeric(Py_UNICODE ch); 2093 2094 /// _ 2095 int _PyUnicode_IsAlpha(Py_UNICODE ch); 2096 2097 }else version(Python_Unicode_UCS2) { 2098 2099 version(Python_2_6_Or_Later) { 2100 2101 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 2102 size. 2103 2104 u may be NULL which causes the contents to be undefined. It is the 2105 user's responsibility to fill in the needed data afterwards. Note 2106 that modifying the Unicode object contents after construction is 2107 only allowed if u was set to NULL. 2108 2109 The buffer is copied into the new object. */ 2110 /// Availability: >= 2.6 2111 PyObject* PyUnicodeUCS2_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 2112 /// ditto 2113 2114 alias PyUnicodeUCS2_FromUnicode PyUnicode_FromUnicode; 2115 2116 2117 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 2118 /// Availability: >= 2.6 2119 PyObject* PyUnicodeUCS2_FromStringAndSize( 2120 const(char)*u, /* char buffer */ 2121 Py_ssize_t size /* size of buffer */ 2122 ); 2123 /// ditto 2124 2125 alias PyUnicodeUCS2_FromStringAndSize PyUnicode_FromStringAndSize; 2126 2127 2128 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 2129 Latin-1 encoded bytes */ 2130 /// Availability: >= 2.6 2131 PyObject* PyUnicodeUCS2_FromString( 2132 const(char)*u /* string */ 2133 ); 2134 /// ditto 2135 2136 alias PyUnicodeUCS2_FromString PyUnicode_FromString; 2137 2138 /// Availability: >= 2.6 2139 PyObject* PyUnicodeUCS2_FromFormatV(const(char)*, va_list); 2140 /// ditto 2141 2142 alias PyUnicodeUCS2_FromFormatV PyUnicode_FromFormatV; 2143 2144 /// Availability: >= 2.6 2145 PyObject* PyUnicodeUCS2_FromFormat(const(char)*, ...); 2146 /// ditto 2147 2148 alias PyUnicodeUCS2_FromFormat PyUnicode_FromFormat; 2149 2150 2151 /** Format the object based on the format_spec, as defined in PEP 3101 2152 (Advanced String Formatting). */ 2153 /// Availability: >= 2.6 2154 PyObject* _PyUnicodeUCS2_FormatAdvanced(PyObject *obj, 2155 Py_UNICODE *format_spec, 2156 Py_ssize_t format_spec_len); 2157 /// ditto 2158 2159 alias _PyUnicodeUCS2_FormatAdvanced _PyUnicode_FormatAdvanced; 2160 2161 /// Availability: >= 2.6 2162 int PyUnicodeUCS2_ClearFreeList(); 2163 /// ditto 2164 2165 alias PyUnicodeUCS2_ClearFreeList PyUnicode_ClearFreeList; 2166 2167 /** 2168 Params: 2169 string = UTF-7 encoded string 2170 length = size of string 2171 error = error handling 2172 consumed = bytes consumed 2173 */ 2174 /// Availability: >= 2.6 2175 PyObject* PyUnicodeUCS2_DecodeUTF7Stateful( 2176 const(char)* string, 2177 Py_ssize_t length, 2178 const(char)*errors, 2179 Py_ssize_t *consumed 2180 ); 2181 /// ditto 2182 2183 alias PyUnicodeUCS2_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 2184 2185 /** 2186 Params: 2187 string = UTF-32 encoded string 2188 length = size of string 2189 error = error handling 2190 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2191 */ 2192 /// Availability: >= 2.6 2193 PyObject* PyUnicodeUCS2_DecodeUTF32( 2194 const(char)* string, 2195 Py_ssize_t length, 2196 const(char)*errors, 2197 int *byteorder 2198 ); 2199 /// ditto 2200 2201 alias PyUnicodeUCS2_DecodeUTF32 PyUnicode_DecodeUTF32; 2202 2203 2204 /** 2205 Params: 2206 string = UTF-32 encoded string 2207 length = size of string 2208 error = error handling 2209 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2210 */ 2211 /// Availability: >= 2.6 2212 PyObject* PyUnicodeUCS2_DecodeUTF32Stateful( 2213 const(char)*string, 2214 Py_ssize_t length, 2215 const(char)*errors, 2216 int *byteorder, 2217 Py_ssize_t *consumed 2218 ); 2219 /// ditto 2220 2221 alias PyUnicodeUCS2_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 2222 2223 /** Returns a Python string using the UTF-32 encoding in native byte 2224 order. The string always starts with a BOM mark. */ 2225 /// Availability: >= 2.6 2226 2227 PyObject* PyUnicodeUCS2_AsUTF32String( 2228 PyObject *unicode 2229 ); 2230 /// ditto 2231 2232 alias PyUnicodeUCS2_AsUTF32String PyUnicode_AsUTF32String; 2233 2234 2235 /** Returns a Python string object holding the UTF-32 encoded value of 2236 the Unicode data. 2237 2238 If byteorder is not 0, output is written according to the following 2239 byte order: 2240 2241 byteorder == -1: little endian 2242 byteorder == 0: native byte order (writes a BOM mark) 2243 byteorder == 1: big endian 2244 2245 If byteorder is 0, the output string will always start with the 2246 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2247 prepended. 2248 Params: 2249 data = Unicode char buffer 2250 length = number of Py_UNICODE chars to encode 2251 errors = error handling 2252 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 2253 2254 */ 2255 /// Availability: >= 2.6 2256 PyObject* PyUnicodeUCS2_EncodeUTF32( 2257 const Py_UNICODE *data, 2258 Py_ssize_t length, 2259 const(char)* errors, 2260 int byteorder 2261 ); 2262 /// ditto 2263 2264 alias PyUnicodeUCS2_EncodeUTF32 PyUnicode_EncodeUTF32; 2265 2266 } 2267 2268 /** Return a read-only pointer to the Unicode object's internal 2269 Py_UNICODE buffer. */ 2270 Py_UNICODE* PyUnicodeUCS2_AsUnicode(PyObject* unicode); 2271 /// ditto 2272 2273 alias PyUnicodeUCS2_AsUnicode PyUnicode_AsUnicode; 2274 2275 /** Get the length of the Unicode object. */ 2276 Py_ssize_t PyUnicodeUCS2_GetSize(PyObject* unicode); 2277 /// ditto 2278 2279 alias PyUnicodeUCS2_GetSize PyUnicode_GetSize; 2280 2281 2282 /** Get the maximum ordinal for a Unicode character. */ 2283 Py_UNICODE PyUnicodeUCS2_GetMax(); 2284 /// ditto 2285 2286 alias PyUnicodeUCS2_GetMax PyUnicode_GetMax; 2287 2288 2289 /** Resize an already allocated Unicode object to the new size length. 2290 2291 _*unicode is modified to point to the new (resized) object and 0 2292 returned on success. 2293 2294 This API may only be called by the function which also called the 2295 Unicode constructor. The refcount on the object must be 1. Otherwise, 2296 an error is returned. 2297 2298 Error handling is implemented as follows: an exception is set, -1 2299 is returned and *unicode left untouched. 2300 Params: 2301 unicode = pointer to the new unicode object. 2302 length = New length. 2303 2304 */ 2305 int PyUnicodeUCS2_Resize(PyObject** unicode, Py_ssize_t length); 2306 /// ditto 2307 2308 alias PyUnicodeUCS2_Resize PyUnicode_Resize; 2309 2310 /** Coerce obj to an Unicode object and return a reference with 2311 _*incremented* refcount. 2312 2313 Coercion is done in the following way: 2314 2315 1. String and other char buffer compatible objects are decoded 2316 under the assumptions that they contain data using the current 2317 default encoding. Decoding is done in "strict" mode. 2318 2319 2. All other objects (including Unicode objects) raise an 2320 exception. 2321 2322 The API returns NULL in case of an error. The caller is responsible 2323 for decref'ing the returned objects. 2324 2325 */ 2326 PyObject* PyUnicodeUCS2_FromEncodedObject( 2327 PyObject* obj, 2328 const(char)* encoding, 2329 const(char)* errors); 2330 /// ditto 2331 2332 alias PyUnicodeUCS2_FromEncodedObject PyUnicode_FromEncodedObject; 2333 2334 2335 /** Coerce obj to an Unicode object and return a reference with 2336 _*incremented* refcount. 2337 2338 Unicode objects are passed back as-is (subclasses are converted to 2339 true Unicode objects), all other objects are delegated to 2340 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 2341 using the default encoding as basis for decoding the object. 2342 2343 The API returns NULL in case of an error. The caller is responsible 2344 for decref'ing the returned objects. 2345 2346 */ 2347 PyObject* PyUnicodeUCS2_FromObject(PyObject* obj); 2348 /// ditto 2349 2350 alias PyUnicodeUCS2_FromObject PyUnicode_FromObject; 2351 2352 2353 /** Create a Unicode Object from the whcar_t buffer w of the given 2354 size. 2355 2356 The buffer is copied into the new object. */ 2357 PyObject* PyUnicodeUCS2_FromWideChar(const(wchar_t)* w, Py_ssize_t size); 2358 /// ditto 2359 2360 alias PyUnicodeUCS2_FromWideChar PyUnicode_FromWideChar; 2361 2362 2363 /** Copies the Unicode Object contents into the wchar_t buffer w. At 2364 most size wchar_t characters are copied. 2365 2366 Note that the resulting wchar_t string may or may not be 2367 0-terminated. It is the responsibility of the caller to make sure 2368 that the wchar_t string is 0-terminated in case this is required by 2369 the application. 2370 2371 Returns the number of wchar_t characters copied (excluding a 2372 possibly trailing 0-termination character) or -1 in case of an 2373 error. */ 2374 Py_ssize_t PyUnicodeUCS2_AsWideChar( 2375 PyUnicodeObject* unicode, 2376 const(wchar_t)* w, 2377 Py_ssize_t size); 2378 /// ditto 2379 2380 alias PyUnicodeUCS2_AsWideChar PyUnicode_AsWideChar; 2381 2382 2383 /** Create a Unicode Object from the given Unicode code point ordinal. 2384 2385 The ordinal must be in range(0x10000) on narrow Python builds 2386 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 2387 raised in case it is not. 2388 2389 */ 2390 PyObject* PyUnicodeUCS2_FromOrdinal(int ordinal); 2391 /// ditto 2392 2393 alias PyUnicodeUCS2_FromOrdinal PyUnicode_FromOrdinal; 2394 2395 2396 /** Return a Python string holding the default encoded value of the 2397 Unicode object. 2398 2399 The resulting string is cached in the Unicode object for subsequent 2400 usage by this function. The cached version is needed to implement 2401 the character buffer interface and will live (at least) as long as 2402 the Unicode object itself. 2403 2404 The refcount of the string is *not* incremented. 2405 2406 _*** Exported for internal use by the interpreter only !!! *** 2407 2408 */ 2409 PyObject* _PyUnicodeUCS2_AsDefaultEncodedString(PyObject *, const(char)*); 2410 /// ditto 2411 2412 alias _PyUnicodeUCS2_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 2413 2414 2415 /** Returns the currently active default encoding. 2416 2417 The default encoding is currently implemented as run-time settable 2418 process global. This may change in future versions of the 2419 interpreter to become a parameter which is managed on a per-thread 2420 basis. 2421 2422 */ 2423 const(char)* PyUnicodeUCS2_GetDefaultEncoding(); 2424 /// ditto 2425 2426 alias PyUnicodeUCS2_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 2427 2428 2429 /** Sets the currently active default encoding. 2430 2431 Returns 0 on success, -1 in case of an error. 2432 2433 */ 2434 int PyUnicodeUCS2_SetDefaultEncoding(const(char)*encoding); 2435 /// ditto 2436 2437 alias PyUnicodeUCS2_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 2438 2439 2440 /** Create a Unicode object by decoding the encoded string s of the 2441 given size. 2442 Params: 2443 s = encoded string 2444 size = size of buffer 2445 encoding = encoding 2446 errors = error handling 2447 */ 2448 PyObject* PyUnicodeUCS2_Decode( 2449 const(char)* s, 2450 Py_ssize_t size, 2451 const(char)* encoding, 2452 const(char)* errors); 2453 /// ditto 2454 2455 alias PyUnicodeUCS2_Decode PyUnicode_Decode; 2456 2457 2458 version(Python_3_0_Or_Later) { 2459 /** Decode a Unicode object unicode and return the result as Python 2460 object. */ 2461 /// Availability: 3.* 2462 2463 PyObject* PyUnicodeUCS2_AsDecodedObject( 2464 PyObject* unicode, 2465 const(char)* encoding, 2466 const(char)* errors 2467 ); 2468 /// ditto 2469 2470 alias PyUnicodeUCS2_AsDecodedObject PyUnicode_AsDecodedObject; 2471 2472 /** Decode a Unicode object unicode and return the result as Unicode 2473 object. */ 2474 /// Availability: 3.* 2475 2476 PyObject* PyUnicodeUCS2_AsDecodedUnicode( 2477 PyObject* unicode, 2478 const(char)* encoding, 2479 const(char)* errors 2480 ); 2481 /// ditto 2482 2483 alias PyUnicodeUCS2_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 2484 2485 } 2486 2487 /** Encodes a Py_UNICODE buffer of the given size and returns a 2488 Python string object. 2489 Params: 2490 s = Unicode char buffer 2491 size = number of Py_UNICODE chars to encode 2492 encoding = encoding 2493 errors = error handling 2494 */ 2495 PyObject* PyUnicodeUCS2_Encode( 2496 Py_UNICODE* s, 2497 Py_ssize_t size, 2498 const(char)* encoding, 2499 const(char)* errors); 2500 /// ditto 2501 2502 alias PyUnicodeUCS2_Encode PyUnicode_Encode; 2503 2504 2505 /** Encodes a Unicode object and returns the result as Python object. 2506 */ 2507 PyObject* PyUnicodeUCS2_AsEncodedObject( 2508 PyObject* unicode, 2509 const(char)* encoding, 2510 const(char)* errors); 2511 /// ditto 2512 2513 alias PyUnicodeUCS2_AsEncodedObject PyUnicode_AsEncodedObject; 2514 2515 2516 /** Encodes a Unicode object and returns the result as Python string 2517 object. */ 2518 PyObject* PyUnicodeUCS2_AsEncodedString( 2519 PyObject* unicode, 2520 const(char)* encoding, 2521 const(char)* errors); 2522 /// ditto 2523 2524 alias PyUnicodeUCS2_AsEncodedString PyUnicode_AsEncodedString; 2525 2526 2527 version(Python_3_0_Or_Later) { 2528 /** Encodes a Unicode object and returns the result as Unicode 2529 object. */ 2530 /// Availability: >= 3.* 2531 PyObject* PyUnicodeUCS2_AsEncodedUnicode( 2532 PyObject* unicode, 2533 const(char)* encoding, 2534 const(char)* errors 2535 ); 2536 /// ditto 2537 2538 alias PyUnicodeUCS2_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 2539 2540 } 2541 2542 /** 2543 Params: 2544 string = UTF-7 encoded string 2545 length = size of string 2546 errors = error handling 2547 */ 2548 PyObject* PyUnicodeUCS2_DecodeUTF7( 2549 const(char)* string, 2550 Py_ssize_t length, 2551 const(char)* errors); 2552 /// ditto 2553 2554 alias PyUnicodeUCS2_DecodeUTF7 PyUnicode_DecodeUTF7; 2555 2556 2557 /** 2558 Params: 2559 data = Unicode char buffer 2560 length = number of Py_UNICODE chars to encode 2561 base64SetO = Encode RFC2152 Set O characters in base64 2562 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 2563 errors = error handling 2564 */ 2565 PyObject* PyUnicodeUCS2_EncodeUTF7( 2566 Py_UNICODE* data, 2567 Py_ssize_t length, 2568 int encodeSetO, 2569 int encodeWhiteSpace, 2570 const(char)* errors 2571 ); 2572 /// ditto 2573 2574 alias PyUnicodeUCS2_EncodeUTF7 PyUnicode_EncodeUTF7; 2575 2576 2577 /// _ 2578 PyObject* PyUnicodeUCS2_DecodeUTF8( 2579 const(char)* string, 2580 Py_ssize_t length, 2581 const(char)* errors); 2582 /// ditto 2583 2584 alias PyUnicodeUCS2_DecodeUTF8 PyUnicode_DecodeUTF8; 2585 2586 /// _ 2587 PyObject* PyUnicodeUCS2_DecodeUTF8Stateful( 2588 const(char)* string, 2589 Py_ssize_t length, 2590 const(char)* errors, 2591 Py_ssize_t* consumed 2592 ); 2593 /// ditto 2594 2595 alias PyUnicodeUCS2_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 2596 2597 /// _ 2598 PyObject* PyUnicodeUCS2_AsUTF8String(PyObject* unicode); 2599 /// ditto 2600 2601 alias PyUnicodeUCS2_AsUTF8String PyUnicode_AsUTF8String; 2602 2603 /// _ 2604 PyObject* PyUnicodeUCS2_EncodeUTF8( 2605 Py_UNICODE* data, 2606 Py_ssize_t length, 2607 const(char) *errors); 2608 /// ditto 2609 2610 alias PyUnicodeUCS2_EncodeUTF8 PyUnicode_EncodeUTF8; 2611 2612 2613 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 2614 the corresponding Unicode object. 2615 2616 errors (if non-NULL) defines the error handling. It defaults 2617 to "strict". 2618 2619 If byteorder is non-NULL, the decoder starts decoding using the 2620 given byte order: 2621 2622 *byteorder == -1: little endian 2623 *byteorder == 0: native order 2624 *byteorder == 1: big endian 2625 2626 In native mode, the first two bytes of the stream are checked for a 2627 BOM mark. If found, the BOM mark is analysed, the byte order 2628 adjusted and the BOM skipped. In the other modes, no BOM mark 2629 interpretation is done. After completion, *byteorder is set to the 2630 current byte order at the end of input data. 2631 2632 If byteorder is NULL, the codec starts in native order mode. 2633 2634 */ 2635 PyObject* PyUnicodeUCS2_DecodeUTF16( 2636 const(char)* string, 2637 Py_ssize_t length, 2638 const(char)* errors, 2639 int* byteorder); 2640 /// ditto 2641 2642 alias PyUnicodeUCS2_DecodeUTF16 PyUnicode_DecodeUTF16; 2643 2644 /** 2645 Params: 2646 string = UTF-16 encoded string 2647 length = size of string 2648 errors = error handling 2649 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2650 consumed = bytes consumed 2651 */ 2652 PyObject* PyUnicodeUCS2_DecodeUTF16Stateful( 2653 const(char)* string, 2654 Py_ssize_t length, 2655 const(char)* errors, 2656 int* byteorder, 2657 Py_ssize_t* consumed 2658 ); 2659 /// ditto 2660 2661 alias PyUnicodeUCS2_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 2662 2663 /** Returns a Python string using the UTF-16 encoding in native byte 2664 order. The string always starts with a BOM mark. */ 2665 PyObject* PyUnicodeUCS2_AsUTF16String(PyObject *unicode); 2666 /// ditto 2667 2668 alias PyUnicodeUCS2_AsUTF16String PyUnicode_AsUTF16String; 2669 2670 /** Returns a Python string object holding the UTF-16 encoded value of 2671 the Unicode data. 2672 2673 If byteorder is not 0, output is written according to the following 2674 byte order: 2675 2676 byteorder == -1: little endian 2677 byteorder == 0: native byte order (writes a BOM mark) 2678 byteorder == 1: big endian 2679 2680 If byteorder is 0, the output string will always start with the 2681 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2682 prepended. 2683 2684 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 2685 UCS-2. This trick makes it possible to add full UTF-16 capabilities 2686 at a later point without compromising the APIs. 2687 2688 */ 2689 PyObject* PyUnicodeUCS2_EncodeUTF16( 2690 Py_UNICODE* data, 2691 Py_ssize_t length, 2692 const(char)* errors, 2693 int byteorder 2694 ); 2695 /// ditto 2696 2697 alias PyUnicodeUCS2_EncodeUTF16 PyUnicode_EncodeUTF16; 2698 2699 2700 /// _ 2701 PyObject* PyUnicodeUCS2_DecodeUnicodeEscape( 2702 const(char)* string, 2703 Py_ssize_t length, 2704 const(char)* errors); 2705 /// ditto 2706 2707 alias PyUnicodeUCS2_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 2708 2709 /// _ 2710 PyObject* PyUnicodeUCS2_AsUnicodeEscapeString( 2711 PyObject* unicode); 2712 /// ditto 2713 2714 alias PyUnicodeUCS2_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 2715 2716 /// _ 2717 PyObject* PyUnicodeUCS2_EncodeUnicodeEscape( 2718 Py_UNICODE* data, 2719 Py_ssize_t length); 2720 /// ditto 2721 2722 alias PyUnicodeUCS2_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 2723 2724 /** 2725 Params: 2726 string = Raw-Unicode-Escape encoded string 2727 length = size of string 2728 errors = error handling 2729 */ 2730 PyObject* PyUnicodeUCS2_DecodeRawUnicodeEscape( 2731 const(char)* string, 2732 Py_ssize_t length, 2733 const(char)* errors); 2734 /// ditto 2735 2736 alias PyUnicodeUCS2_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 2737 2738 /// _ 2739 PyObject* PyUnicodeUCS2_AsRawUnicodeEscapeString(PyObject* unicode); 2740 /// ditto 2741 2742 alias PyUnicodeUCS2_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 2743 2744 /// _ 2745 PyObject* PyUnicodeUCS2_EncodeRawUnicodeEscape( 2746 Py_UNICODE* data, Py_ssize_t length); 2747 /// ditto 2748 2749 alias PyUnicodeUCS2_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 2750 2751 2752 /// _ 2753 PyObject* _PyUnicodeUCS2_DecodeUnicodeInternal( 2754 const(char)* string, 2755 Py_ssize_t length, 2756 const(char)* errors); 2757 /// ditto 2758 2759 alias _PyUnicodeUCS2_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 2760 2761 2762 /** 2763 Params: 2764 string = Latin-1 encoded string 2765 length = size of string 2766 errors = error handling 2767 */ 2768 PyObject* PyUnicodeUCS2_DecodeLatin1( 2769 const(char)* string, 2770 Py_ssize_t length, 2771 const(char)* errors); 2772 /// ditto 2773 2774 alias PyUnicodeUCS2_DecodeLatin1 PyUnicode_DecodeLatin1; 2775 2776 /// _ 2777 PyObject* PyUnicodeUCS2_AsLatin1String(PyObject *unicode); 2778 /// ditto 2779 2780 alias PyUnicodeUCS2_AsLatin1String PyUnicode_AsLatin1String; 2781 2782 /** 2783 Params: 2784 data = Unicode char buffer 2785 length = Number of Py_UNICODE chars to encode 2786 errors = error handling 2787 */ 2788 PyObject* PyUnicodeUCS2_EncodeLatin1( 2789 Py_UNICODE* data, 2790 Py_ssize_t length, 2791 const(char)* errors); 2792 /// ditto 2793 2794 alias PyUnicodeUCS2_EncodeLatin1 PyUnicode_EncodeLatin1; 2795 2796 2797 /** 2798 Params: 2799 data = Unicode char buffer 2800 length = Number of Py_UNICODE chars to encode 2801 errors = error handling 2802 */ 2803 PyObject* PyUnicodeUCS2_DecodeASCII( 2804 const(char)* string, 2805 Py_ssize_t length, 2806 const(char)* errors); 2807 /// ditto 2808 2809 alias PyUnicodeUCS2_DecodeASCII PyUnicode_DecodeASCII; 2810 2811 /// _ 2812 PyObject* PyUnicodeUCS2_AsASCIIString(PyObject *unicode); 2813 /// ditto 2814 2815 alias PyUnicodeUCS2_AsASCIIString PyUnicode_AsASCIIString; 2816 2817 /** 2818 Params: 2819 data = Unicode char buffer 2820 length = Number of Py_UNICODE chars to encode 2821 errors = error handling 2822 */ 2823 PyObject* PyUnicodeUCS2_EncodeASCII( 2824 Py_UNICODE* data, 2825 Py_ssize_t length, 2826 const(char)* errors); 2827 /// ditto 2828 2829 alias PyUnicodeUCS2_EncodeASCII PyUnicode_EncodeASCII; 2830 2831 2832 /** 2833 Params: 2834 string = Encoded string 2835 length = size of string 2836 mapping = character mapping (char ordinal -> unicode ordinal) 2837 errors = error handling 2838 */ 2839 PyObject* PyUnicodeUCS2_DecodeCharmap( 2840 const(char)* string, 2841 Py_ssize_t length, 2842 PyObject* mapping, 2843 const(char)* errors 2844 ); 2845 /// ditto 2846 2847 alias PyUnicodeUCS2_DecodeCharmap PyUnicode_DecodeCharmap; 2848 2849 /** 2850 Params: 2851 unicode = Unicode object 2852 mapping = character mapping (unicode ordinal -> char ordinal) 2853 */ 2854 PyObject* PyUnicodeUCS2_AsCharmapString( 2855 PyObject* unicode, 2856 PyObject* mapping); 2857 /// ditto 2858 2859 alias PyUnicodeUCS2_AsCharmapString PyUnicode_AsCharmapString; 2860 2861 /** 2862 Params: 2863 data = Unicode char buffer 2864 length = Number of Py_UNICODE chars to encode 2865 mapping = character mapping (unicode ordinal -> char ordinal) 2866 errors = error handling 2867 */ 2868 PyObject* PyUnicodeUCS2_EncodeCharmap( 2869 Py_UNICODE* data, 2870 Py_ssize_t length, 2871 PyObject* mapping, 2872 const(char)* errors 2873 ); 2874 /// ditto 2875 2876 alias PyUnicodeUCS2_EncodeCharmap PyUnicode_EncodeCharmap; 2877 2878 /** Translate a Py_UNICODE buffer of the given length by applying a 2879 character mapping table to it and return the resulting Unicode 2880 object. 2881 2882 The mapping table must map Unicode ordinal integers to Unicode 2883 ordinal integers or None (causing deletion of the character). 2884 2885 Mapping tables may be dictionaries or sequences. Unmapped character 2886 ordinals (ones which cause a LookupError) are left untouched and 2887 are copied as-is. 2888 2889 */ 2890 PyObject* PyUnicodeUCS2_TranslateCharmap( 2891 Py_UNICODE* data, 2892 Py_ssize_t length, 2893 PyObject* table, 2894 const(char)* errors 2895 ); 2896 /// ditto 2897 2898 alias PyUnicodeUCS2_TranslateCharmap PyUnicode_TranslateCharmap; 2899 2900 2901 version (Windows) { 2902 /// Availability: Windows only 2903 PyObject* PyUnicodeUCS2_DecodeMBCS( 2904 const(char)* string, 2905 Py_ssize_t length, 2906 const(char)* errors); 2907 /// ditto 2908 2909 alias PyUnicodeUCS2_DecodeMBCS PyUnicode_DecodeMBCS; 2910 2911 /// Availability: Windows only 2912 PyObject* PyUnicodeUCS2_AsMBCSString(PyObject* unicode); 2913 /// ditto 2914 2915 alias PyUnicodeUCS2_AsMBCSString PyUnicode_AsMBCSString; 2916 2917 /// Availability: Windows only 2918 PyObject* PyUnicodeUCS2_EncodeMBCS( 2919 Py_UNICODE* data, 2920 Py_ssize_t length, 2921 const(char)* errors); 2922 /// ditto 2923 2924 alias PyUnicodeUCS2_EncodeMBCS PyUnicode_EncodeMBCS; 2925 2926 } 2927 /** Takes a Unicode string holding a decimal value and writes it into 2928 an output buffer using standard ASCII digit codes. 2929 2930 The output buffer has to provide at least length+1 bytes of storage 2931 area. The output string is 0-terminated. 2932 2933 The encoder converts whitespace to ' ', decimal characters to their 2934 corresponding ASCII digit and all other Latin-1 characters except 2935 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 2936 are treated as errors. This includes embedded NULL bytes. 2937 2938 Error handling is defined by the errors argument: 2939 2940 NULL or "strict": raise a ValueError 2941 "ignore": ignore the wrong characters (these are not copied to the 2942 output buffer) 2943 "replace": replaces illegal characters with '?' 2944 2945 Returns 0 on success, -1 on failure. 2946 2947 */ 2948 int PyUnicodeUCS2_EncodeDecimal( 2949 Py_UNICODE* s, 2950 Py_ssize_t length, 2951 char* output, 2952 const(char)* errors); 2953 /// ditto 2954 2955 alias PyUnicodeUCS2_EncodeDecimal PyUnicode_EncodeDecimal; 2956 2957 2958 /** Concat two strings giving a new Unicode string. */ 2959 PyObject* PyUnicodeUCS2_Concat( 2960 PyObject* left, 2961 PyObject* right); 2962 /// ditto 2963 2964 alias PyUnicodeUCS2_Concat PyUnicode_Concat; 2965 2966 2967 version(Python_3_0_Or_Later) { 2968 /** Concat two strings and put the result in *pleft 2969 (sets *pleft to NULL on error) 2970 Params: 2971 pleft = Pointer to left string 2972 right = Right string 2973 */ 2974 /// Availability: 3.* 2975 2976 void PyUnicodeUCS2_Append( 2977 PyObject** pleft, 2978 PyObject* right 2979 ); 2980 /// ditto 2981 2982 alias PyUnicodeUCS2_Append PyUnicode_Append; 2983 2984 2985 /** Concat two strings, put the result in *pleft and drop the right object 2986 (sets *pleft to NULL on error) 2987 Params: 2988 pleft = Pointer to left string 2989 */ 2990 /// Availability: 3.* 2991 void PyUnicodeUCS2_AppendAndDel( 2992 PyObject** pleft, 2993 PyObject* right 2994 ); 2995 /// ditto 2996 2997 alias PyUnicodeUCS2_AppendAndDel PyUnicode_AppendAndDel; 2998 2999 } 3000 3001 /** Split a string giving a list of Unicode strings. 3002 3003 If sep is NULL, splitting will be done at all whitespace 3004 substrings. Otherwise, splits occur at the given separator. 3005 3006 At most maxsplit splits will be done. If negative, no limit is set. 3007 3008 Separators are not included in the resulting list. 3009 3010 */ 3011 PyObject* PyUnicodeUCS2_Split( 3012 PyObject* s, 3013 PyObject* sep, 3014 Py_ssize_t maxsplit); 3015 /// ditto 3016 3017 alias PyUnicodeUCS2_Split PyUnicode_Split; 3018 3019 3020 /** Ditto PyUnicode_Split, but split at line breaks. 3021 3022 CRLF is considered to be one line break. Line breaks are not 3023 included in the resulting list. */ 3024 PyObject* PyUnicodeUCS2_Splitlines( 3025 PyObject* s, 3026 int keepends); 3027 /// ditto 3028 3029 alias PyUnicodeUCS2_Splitlines PyUnicode_Splitlines; 3030 3031 3032 version(Python_2_5_Or_Later) { 3033 /** Partition a string using a given separator. */ 3034 /// Availability: >= 2.5 3035 PyObject* PyUnicodeUCS2_Partition( 3036 PyObject* s, 3037 PyObject* sep 3038 ); 3039 /// ditto 3040 3041 alias PyUnicodeUCS2_Partition PyUnicode_Partition; 3042 3043 3044 /** Partition a string using a given separator, searching from the end 3045 of the string. */ 3046 3047 PyObject* PyUnicodeUCS2_RPartition( 3048 PyObject* s, 3049 PyObject* sep 3050 ); 3051 /// ditto 3052 3053 alias PyUnicodeUCS2_RPartition PyUnicode_RPartition; 3054 3055 } 3056 3057 /** Split a string giving a list of Unicode strings. 3058 3059 If sep is NULL, splitting will be done at all whitespace 3060 substrings. Otherwise, splits occur at the given separator. 3061 3062 At most maxsplit splits will be done. But unlike PyUnicode_Split 3063 PyUnicode_RSplit splits from the end of the string. If negative, 3064 no limit is set. 3065 3066 Separators are not included in the resulting list. 3067 3068 */ 3069 PyObject* PyUnicodeUCS2_RSplit( 3070 PyObject* s, 3071 PyObject* sep, 3072 Py_ssize_t maxsplit); 3073 /// ditto 3074 3075 alias PyUnicodeUCS2_RSplit PyUnicode_RSplit; 3076 3077 3078 /** Translate a string by applying a character mapping table to it and 3079 return the resulting Unicode object. 3080 3081 The mapping table must map Unicode ordinal integers to Unicode 3082 ordinal integers or None (causing deletion of the character). 3083 3084 Mapping tables may be dictionaries or sequences. Unmapped character 3085 ordinals (ones which cause a LookupError) are left untouched and 3086 are copied as-is. 3087 3088 */ 3089 PyObject* PyUnicodeUCS2_Translate( 3090 PyObject* str, 3091 PyObject* table, 3092 const(char)* errors); 3093 /// ditto 3094 3095 alias PyUnicodeUCS2_Translate PyUnicode_Translate; 3096 3097 3098 /** Join a sequence of strings using the given separator and return 3099 the resulting Unicode string. */ 3100 PyObject* PyUnicodeUCS2_Join( 3101 PyObject* separator, 3102 PyObject* seq); 3103 /// ditto 3104 3105 alias PyUnicodeUCS2_Join PyUnicode_Join; 3106 3107 3108 /** Return 1 if substr matches str[start:end] at the given tail end, 0 3109 otherwise. */ 3110 Py_ssize_t PyUnicodeUCS2_Tailmatch( 3111 PyObject* str, 3112 PyObject* substr, 3113 Py_ssize_t start, 3114 Py_ssize_t end, 3115 int direction 3116 ); 3117 /// ditto 3118 3119 alias PyUnicodeUCS2_Tailmatch PyUnicode_Tailmatch; 3120 3121 3122 /** Return the first position of substr in str[start:end] using the 3123 given search direction or -1 if not found. -2 is returned in case 3124 an error occurred and an exception is set. */ 3125 Py_ssize_t PyUnicodeUCS2_Find( 3126 PyObject* str, 3127 PyObject* substr, 3128 Py_ssize_t start, 3129 Py_ssize_t end, 3130 int direction 3131 ); 3132 /// ditto 3133 3134 alias PyUnicodeUCS2_Find PyUnicode_Find; 3135 3136 3137 /** Count the number of occurrences of substr in str[start:end]. */ 3138 Py_ssize_t PyUnicodeUCS2_Count( 3139 PyObject* str, 3140 PyObject* substr, 3141 Py_ssize_t start, 3142 Py_ssize_t end); 3143 /// ditto 3144 3145 alias PyUnicodeUCS2_Count PyUnicode_Count; 3146 3147 3148 /** Replace at most maxcount occurrences of substr in str with replstr 3149 and return the resulting Unicode object. */ 3150 PyObject* PyUnicodeUCS2_Replace( 3151 PyObject* str, 3152 PyObject* substr, 3153 PyObject* replstr, 3154 Py_ssize_t maxcount 3155 ); 3156 /// ditto 3157 3158 alias PyUnicodeUCS2_Replace PyUnicode_Replace; 3159 3160 3161 /** Compare two strings and return -1, 0, 1 for less than, equal, 3162 greater than resp. */ 3163 int PyUnicodeUCS2_Compare(PyObject* left, PyObject* right); 3164 /// ditto 3165 3166 alias PyUnicodeUCS2_Compare PyUnicode_Compare; 3167 3168 version(Python_3_0_Or_Later) { 3169 /** Compare two strings and return -1, 0, 1 for less than, equal, 3170 greater than resp. 3171 Params: 3172 left = 3173 right = ASCII-encoded string 3174 */ 3175 /// Availability: 3.* 3176 int PyUnicodeUCS2_CompareWithASCIIString( 3177 PyObject* left, 3178 const(char)* right 3179 ); 3180 /// ditto 3181 3182 alias PyUnicodeUCS2_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 3183 3184 } 3185 3186 version(Python_2_5_Or_Later) { 3187 /** Rich compare two strings and return one of the following: 3188 3189 - NULL in case an exception was raised 3190 - Py_True or Py_False for successfuly comparisons 3191 - Py_NotImplemented in case the type combination is unknown 3192 3193 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 3194 case the conversion of the arguments to Unicode fails with a 3195 UnicodeDecodeError. 3196 3197 Possible values for op: 3198 3199 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 3200 3201 */ 3202 /// Availability: >= 2.5 3203 PyObject* PyUnicodeUCS2_RichCompare( 3204 PyObject* left, 3205 PyObject* right, 3206 int op 3207 ); 3208 /// ditto 3209 3210 alias PyUnicodeUCS2_RichCompare PyUnicode_RichCompare; 3211 3212 } 3213 3214 /** Apply a argument tuple or dictionary to a format string and return 3215 the resulting Unicode string. */ 3216 PyObject* PyUnicodeUCS2_Format(PyObject* format, PyObject* args); 3217 /// ditto 3218 3219 alias PyUnicodeUCS2_Format PyUnicode_Format; 3220 3221 3222 /** Checks whether element is contained in container and return 1/0 3223 accordingly. 3224 3225 element has to coerce to an one element Unicode string. -1 is 3226 returned in case of an error. */ 3227 int PyUnicodeUCS2_Contains(PyObject* container, PyObject* element); 3228 /// ditto 3229 3230 alias PyUnicodeUCS2_Contains PyUnicode_Contains; 3231 3232 3233 version(Python_3_0_Or_Later) { 3234 /** Checks whether argument is a valid identifier. */ 3235 /// Availability: 3.* 3236 int PyUnicodeUCS2_IsIdentifier(PyObject* s); 3237 /// ditto 3238 3239 alias PyUnicodeUCS2_IsIdentifier PyUnicode_IsIdentifier; 3240 3241 } 3242 3243 3244 /// _ 3245 int _PyUnicodeUCS2_IsLowercase(Py_UNICODE ch); 3246 /// ditto 3247 3248 alias _PyUnicodeUCS2_IsLowercase _PyUnicode_IsLowercase; 3249 3250 /// _ 3251 int _PyUnicodeUCS2_IsUppercase(Py_UNICODE ch); 3252 /// ditto 3253 3254 alias _PyUnicodeUCS2_IsUppercase _PyUnicode_IsUppercase; 3255 3256 /// _ 3257 int _PyUnicodeUCS2_IsTitlecase(Py_UNICODE ch); 3258 /// ditto 3259 3260 alias _PyUnicodeUCS2_IsTitlecase _PyUnicode_IsTitlecase; 3261 3262 /// _ 3263 int _PyUnicodeUCS2_IsWhitespace(Py_UNICODE ch); 3264 /// ditto 3265 3266 alias _PyUnicodeUCS2_IsWhitespace _PyUnicode_IsWhitespace; 3267 3268 /// _ 3269 int _PyUnicodeUCS2_IsLinebreak(Py_UNICODE ch); 3270 /// ditto 3271 3272 alias _PyUnicodeUCS2_IsLinebreak _PyUnicode_IsLinebreak; 3273 3274 /// _ 3275 Py_UNICODE _PyUnicodeUCS2_ToLowercase(Py_UNICODE ch); 3276 /// ditto 3277 3278 alias _PyUnicodeUCS2_ToLowercase _PyUnicode_ToLowercase; 3279 3280 /// _ 3281 Py_UNICODE _PyUnicodeUCS2_ToUppercase(Py_UNICODE ch); 3282 /// ditto 3283 3284 alias _PyUnicodeUCS2_ToUppercase _PyUnicode_ToUppercase; 3285 3286 /// _ 3287 Py_UNICODE _PyUnicodeUCS2_ToTitlecase(Py_UNICODE ch); 3288 /// ditto 3289 3290 alias _PyUnicodeUCS2_ToTitlecase _PyUnicode_ToTitlecase; 3291 3292 /// _ 3293 int _PyUnicodeUCS2_ToDecimalDigit(Py_UNICODE ch); 3294 /// ditto 3295 3296 alias _PyUnicodeUCS2_ToDecimalDigit _PyUnicode_ToDecimalDigit; 3297 3298 /// _ 3299 int _PyUnicodeUCS2_ToDigit(Py_UNICODE ch); 3300 /// ditto 3301 3302 alias _PyUnicodeUCS2_ToDigit _PyUnicode_ToDigit; 3303 3304 /// _ 3305 double _PyUnicodeUCS2_ToNumeric(Py_UNICODE ch); 3306 /// ditto 3307 3308 alias _PyUnicodeUCS2_ToNumeric _PyUnicode_ToNumeric; 3309 3310 /// _ 3311 int _PyUnicodeUCS2_IsDecimalDigit(Py_UNICODE ch); 3312 /// ditto 3313 3314 alias _PyUnicodeUCS2_IsDecimalDigit _PyUnicode_IsDecimalDigit; 3315 3316 /// _ 3317 int _PyUnicodeUCS2_IsDigit(Py_UNICODE ch); 3318 /// ditto 3319 3320 alias _PyUnicodeUCS2_IsDigit _PyUnicode_IsDigit; 3321 3322 /// _ 3323 int _PyUnicodeUCS2_IsNumeric(Py_UNICODE ch); 3324 /// ditto 3325 3326 alias _PyUnicodeUCS2_IsNumeric _PyUnicode_IsNumeric; 3327 3328 /// _ 3329 int _PyUnicodeUCS2_IsAlpha(Py_UNICODE ch); 3330 /// ditto 3331 3332 alias _PyUnicodeUCS2_IsAlpha _PyUnicode_IsAlpha; 3333 3334 }else{ 3335 3336 version(Python_2_6_Or_Later) { 3337 3338 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 3339 size. 3340 3341 u may be NULL which causes the contents to be undefined. It is the 3342 user's responsibility to fill in the needed data afterwards. Note 3343 that modifying the Unicode object contents after construction is 3344 only allowed if u was set to NULL. 3345 3346 The buffer is copied into the new object. */ 3347 /// Availability: >= 2.6 3348 PyObject* PyUnicodeUCS4_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 3349 /// ditto 3350 3351 alias PyUnicodeUCS4_FromUnicode PyUnicode_FromUnicode; 3352 3353 3354 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 3355 /// Availability: >= 2.6 3356 PyObject* PyUnicodeUCS4_FromStringAndSize( 3357 const(char)*u, /* char buffer */ 3358 Py_ssize_t size /* size of buffer */ 3359 ); 3360 /// ditto 3361 3362 alias PyUnicodeUCS4_FromStringAndSize PyUnicode_FromStringAndSize; 3363 3364 3365 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 3366 Latin-1 encoded bytes */ 3367 /// Availability: >= 2.6 3368 PyObject* PyUnicodeUCS4_FromString( 3369 const(char)*u /* string */ 3370 ); 3371 /// ditto 3372 3373 alias PyUnicodeUCS4_FromString PyUnicode_FromString; 3374 3375 /// Availability: >= 2.6 3376 PyObject* PyUnicodeUCS4_FromFormatV(const(char)*, va_list); 3377 /// ditto 3378 3379 alias PyUnicodeUCS4_FromFormatV PyUnicode_FromFormatV; 3380 3381 /// Availability: >= 2.6 3382 PyObject* PyUnicodeUCS4_FromFormat(const(char)*, ...); 3383 /// ditto 3384 3385 alias PyUnicodeUCS4_FromFormat PyUnicode_FromFormat; 3386 3387 3388 /** Format the object based on the format_spec, as defined in PEP 3101 3389 (Advanced String Formatting). */ 3390 /// Availability: >= 2.6 3391 PyObject* _PyUnicodeUCS4_FormatAdvanced(PyObject *obj, 3392 Py_UNICODE *format_spec, 3393 Py_ssize_t format_spec_len); 3394 /// ditto 3395 3396 alias _PyUnicodeUCS4_FormatAdvanced _PyUnicode_FormatAdvanced; 3397 3398 /// Availability: >= 2.6 3399 int PyUnicodeUCS4_ClearFreeList(); 3400 /// ditto 3401 3402 alias PyUnicodeUCS4_ClearFreeList PyUnicode_ClearFreeList; 3403 3404 /** 3405 Params: 3406 string = UTF-7 encoded string 3407 length = size of string 3408 error = error handling 3409 consumed = bytes consumed 3410 */ 3411 /// Availability: >= 2.6 3412 PyObject* PyUnicodeUCS4_DecodeUTF7Stateful( 3413 const(char)* string, 3414 Py_ssize_t length, 3415 const(char)*errors, 3416 Py_ssize_t *consumed 3417 ); 3418 /// ditto 3419 3420 alias PyUnicodeUCS4_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 3421 3422 /** 3423 Params: 3424 string = UTF-32 encoded string 3425 length = size of string 3426 error = error handling 3427 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3428 */ 3429 /// Availability: >= 2.6 3430 PyObject* PyUnicodeUCS4_DecodeUTF32( 3431 const(char)* string, 3432 Py_ssize_t length, 3433 const(char)*errors, 3434 int *byteorder 3435 ); 3436 /// ditto 3437 3438 alias PyUnicodeUCS4_DecodeUTF32 PyUnicode_DecodeUTF32; 3439 3440 3441 /** 3442 Params: 3443 string = UTF-32 encoded string 3444 length = size of string 3445 error = error handling 3446 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3447 */ 3448 /// Availability: >= 2.6 3449 PyObject* PyUnicodeUCS4_DecodeUTF32Stateful( 3450 const(char)*string, 3451 Py_ssize_t length, 3452 const(char)*errors, 3453 int *byteorder, 3454 Py_ssize_t *consumed 3455 ); 3456 /// ditto 3457 3458 alias PyUnicodeUCS4_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 3459 3460 /** Returns a Python string using the UTF-32 encoding in native byte 3461 order. The string always starts with a BOM mark. */ 3462 /// Availability: >= 2.6 3463 3464 PyObject* PyUnicodeUCS4_AsUTF32String( 3465 PyObject *unicode 3466 ); 3467 /// ditto 3468 3469 alias PyUnicodeUCS4_AsUTF32String PyUnicode_AsUTF32String; 3470 3471 3472 /** Returns a Python string object holding the UTF-32 encoded value of 3473 the Unicode data. 3474 3475 If byteorder is not 0, output is written according to the following 3476 byte order: 3477 3478 byteorder == -1: little endian 3479 byteorder == 0: native byte order (writes a BOM mark) 3480 byteorder == 1: big endian 3481 3482 If byteorder is 0, the output string will always start with the 3483 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3484 prepended. 3485 Params: 3486 data = Unicode char buffer 3487 length = number of Py_UNICODE chars to encode 3488 errors = error handling 3489 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 3490 3491 */ 3492 /// Availability: >= 2.6 3493 PyObject* PyUnicodeUCS4_EncodeUTF32( 3494 const Py_UNICODE *data, 3495 Py_ssize_t length, 3496 const(char)* errors, 3497 int byteorder 3498 ); 3499 /// ditto 3500 3501 alias PyUnicodeUCS4_EncodeUTF32 PyUnicode_EncodeUTF32; 3502 3503 } 3504 3505 /** Return a read-only pointer to the Unicode object's internal 3506 Py_UNICODE buffer. */ 3507 Py_UNICODE* PyUnicodeUCS4_AsUnicode(PyObject* unicode); 3508 /// ditto 3509 3510 alias PyUnicodeUCS4_AsUnicode PyUnicode_AsUnicode; 3511 3512 /** Get the length of the Unicode object. */ 3513 Py_ssize_t PyUnicodeUCS4_GetSize(PyObject* unicode); 3514 /// ditto 3515 3516 alias PyUnicodeUCS4_GetSize PyUnicode_GetSize; 3517 3518 3519 /** Get the maximum ordinal for a Unicode character. */ 3520 Py_UNICODE PyUnicodeUCS4_GetMax(); 3521 /// ditto 3522 3523 alias PyUnicodeUCS4_GetMax PyUnicode_GetMax; 3524 3525 3526 /** Resize an already allocated Unicode object to the new size length. 3527 3528 _*unicode is modified to point to the new (resized) object and 0 3529 returned on success. 3530 3531 This API may only be called by the function which also called the 3532 Unicode constructor. The refcount on the object must be 1. Otherwise, 3533 an error is returned. 3534 3535 Error handling is implemented as follows: an exception is set, -1 3536 is returned and *unicode left untouched. 3537 Params: 3538 unicode = pointer to the new unicode object. 3539 length = New length. 3540 3541 */ 3542 int PyUnicodeUCS4_Resize(PyObject** unicode, Py_ssize_t length); 3543 /// ditto 3544 3545 alias PyUnicodeUCS4_Resize PyUnicode_Resize; 3546 3547 /** Coerce obj to an Unicode object and return a reference with 3548 _*incremented* refcount. 3549 3550 Coercion is done in the following way: 3551 3552 1. String and other char buffer compatible objects are decoded 3553 under the assumptions that they contain data using the current 3554 default encoding. Decoding is done in "strict" mode. 3555 3556 2. All other objects (including Unicode objects) raise an 3557 exception. 3558 3559 The API returns NULL in case of an error. The caller is responsible 3560 for decref'ing the returned objects. 3561 3562 */ 3563 PyObject* PyUnicodeUCS4_FromEncodedObject( 3564 PyObject* obj, 3565 const(char)* encoding, 3566 const(char)* errors); 3567 /// ditto 3568 3569 alias PyUnicodeUCS4_FromEncodedObject PyUnicode_FromEncodedObject; 3570 3571 3572 /** Coerce obj to an Unicode object and return a reference with 3573 _*incremented* refcount. 3574 3575 Unicode objects are passed back as-is (subclasses are converted to 3576 true Unicode objects), all other objects are delegated to 3577 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 3578 using the default encoding as basis for decoding the object. 3579 3580 The API returns NULL in case of an error. The caller is responsible 3581 for decref'ing the returned objects. 3582 3583 */ 3584 PyObject* PyUnicodeUCS4_FromObject(PyObject* obj); 3585 /// ditto 3586 3587 alias PyUnicodeUCS4_FromObject PyUnicode_FromObject; 3588 3589 3590 /** Create a Unicode Object from the whcar_t buffer w of the given 3591 size. 3592 3593 The buffer is copied into the new object. */ 3594 PyObject* PyUnicodeUCS4_FromWideChar(const(wchar_t)* w, Py_ssize_t size); 3595 /// ditto 3596 3597 alias PyUnicodeUCS4_FromWideChar PyUnicode_FromWideChar; 3598 3599 3600 /** Copies the Unicode Object contents into the wchar_t buffer w. At 3601 most size wchar_t characters are copied. 3602 3603 Note that the resulting wchar_t string may or may not be 3604 0-terminated. It is the responsibility of the caller to make sure 3605 that the wchar_t string is 0-terminated in case this is required by 3606 the application. 3607 3608 Returns the number of wchar_t characters copied (excluding a 3609 possibly trailing 0-termination character) or -1 in case of an 3610 error. */ 3611 Py_ssize_t PyUnicodeUCS4_AsWideChar( 3612 PyUnicodeObject* unicode, 3613 const(wchar_t)* w, 3614 Py_ssize_t size); 3615 /// ditto 3616 3617 alias PyUnicodeUCS4_AsWideChar PyUnicode_AsWideChar; 3618 3619 3620 /** Create a Unicode Object from the given Unicode code point ordinal. 3621 3622 The ordinal must be in range(0x10000) on narrow Python builds 3623 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 3624 raised in case it is not. 3625 3626 */ 3627 PyObject* PyUnicodeUCS4_FromOrdinal(int ordinal); 3628 /// ditto 3629 3630 alias PyUnicodeUCS4_FromOrdinal PyUnicode_FromOrdinal; 3631 3632 3633 /** Return a Python string holding the default encoded value of the 3634 Unicode object. 3635 3636 The resulting string is cached in the Unicode object for subsequent 3637 usage by this function. The cached version is needed to implement 3638 the character buffer interface and will live (at least) as long as 3639 the Unicode object itself. 3640 3641 The refcount of the string is *not* incremented. 3642 3643 _*** Exported for internal use by the interpreter only !!! *** 3644 3645 */ 3646 PyObject* _PyUnicodeUCS4_AsDefaultEncodedString(PyObject *, const(char)*); 3647 /// ditto 3648 3649 alias _PyUnicodeUCS4_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 3650 3651 3652 /** Returns the currently active default encoding. 3653 3654 The default encoding is currently implemented as run-time settable 3655 process global. This may change in future versions of the 3656 interpreter to become a parameter which is managed on a per-thread 3657 basis. 3658 3659 */ 3660 const(char)* PyUnicodeUCS4_GetDefaultEncoding(); 3661 /// ditto 3662 3663 alias PyUnicodeUCS4_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 3664 3665 3666 /** Sets the currently active default encoding. 3667 3668 Returns 0 on success, -1 in case of an error. 3669 3670 */ 3671 int PyUnicodeUCS4_SetDefaultEncoding(const(char)*encoding); 3672 /// ditto 3673 3674 alias PyUnicodeUCS4_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 3675 3676 3677 /** Create a Unicode object by decoding the encoded string s of the 3678 given size. 3679 Params: 3680 s = encoded string 3681 size = size of buffer 3682 encoding = encoding 3683 errors = error handling 3684 */ 3685 PyObject* PyUnicodeUCS4_Decode( 3686 const(char)* s, 3687 Py_ssize_t size, 3688 const(char)* encoding, 3689 const(char)* errors); 3690 /// ditto 3691 3692 alias PyUnicodeUCS4_Decode PyUnicode_Decode; 3693 3694 3695 version(Python_3_0_Or_Later) { 3696 /** Decode a Unicode object unicode and return the result as Python 3697 object. */ 3698 /// Availability: 3.* 3699 3700 PyObject* PyUnicodeUCS4_AsDecodedObject( 3701 PyObject* unicode, 3702 const(char)* encoding, 3703 const(char)* errors 3704 ); 3705 /// ditto 3706 3707 alias PyUnicodeUCS4_AsDecodedObject PyUnicode_AsDecodedObject; 3708 3709 /** Decode a Unicode object unicode and return the result as Unicode 3710 object. */ 3711 /// Availability: 3.* 3712 3713 PyObject* PyUnicodeUCS4_AsDecodedUnicode( 3714 PyObject* unicode, 3715 const(char)* encoding, 3716 const(char)* errors 3717 ); 3718 /// ditto 3719 3720 alias PyUnicodeUCS4_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 3721 3722 } 3723 3724 /** Encodes a Py_UNICODE buffer of the given size and returns a 3725 Python string object. 3726 Params: 3727 s = Unicode char buffer 3728 size = number of Py_UNICODE chars to encode 3729 encoding = encoding 3730 errors = error handling 3731 */ 3732 PyObject* PyUnicodeUCS4_Encode( 3733 Py_UNICODE* s, 3734 Py_ssize_t size, 3735 const(char)* encoding, 3736 const(char)* errors); 3737 /// ditto 3738 3739 alias PyUnicodeUCS4_Encode PyUnicode_Encode; 3740 3741 3742 /** Encodes a Unicode object and returns the result as Python object. 3743 */ 3744 PyObject* PyUnicodeUCS4_AsEncodedObject( 3745 PyObject* unicode, 3746 const(char)* encoding, 3747 const(char)* errors); 3748 /// ditto 3749 3750 alias PyUnicodeUCS4_AsEncodedObject PyUnicode_AsEncodedObject; 3751 3752 3753 /** Encodes a Unicode object and returns the result as Python string 3754 object. */ 3755 PyObject* PyUnicodeUCS4_AsEncodedString( 3756 PyObject* unicode, 3757 const(char)* encoding, 3758 const(char)* errors); 3759 /// ditto 3760 3761 alias PyUnicodeUCS4_AsEncodedString PyUnicode_AsEncodedString; 3762 3763 3764 version(Python_3_0_Or_Later) { 3765 /** Encodes a Unicode object and returns the result as Unicode 3766 object. */ 3767 /// Availability: >= 3.* 3768 PyObject* PyUnicodeUCS4_AsEncodedUnicode( 3769 PyObject* unicode, 3770 const(char)* encoding, 3771 const(char)* errors 3772 ); 3773 /// ditto 3774 3775 alias PyUnicodeUCS4_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 3776 3777 } 3778 3779 /** 3780 Params: 3781 string = UTF-7 encoded string 3782 length = size of string 3783 errors = error handling 3784 */ 3785 PyObject* PyUnicodeUCS4_DecodeUTF7( 3786 const(char)* string, 3787 Py_ssize_t length, 3788 const(char)* errors); 3789 /// ditto 3790 3791 alias PyUnicodeUCS4_DecodeUTF7 PyUnicode_DecodeUTF7; 3792 3793 3794 /** 3795 Params: 3796 data = Unicode char buffer 3797 length = number of Py_UNICODE chars to encode 3798 base64SetO = Encode RFC2152 Set O characters in base64 3799 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 3800 errors = error handling 3801 */ 3802 PyObject* PyUnicodeUCS4_EncodeUTF7( 3803 Py_UNICODE* data, 3804 Py_ssize_t length, 3805 int encodeSetO, 3806 int encodeWhiteSpace, 3807 const(char)* errors 3808 ); 3809 /// ditto 3810 3811 alias PyUnicodeUCS4_EncodeUTF7 PyUnicode_EncodeUTF7; 3812 3813 3814 /// _ 3815 PyObject* PyUnicodeUCS4_DecodeUTF8( 3816 const(char)* string, 3817 Py_ssize_t length, 3818 const(char)* errors); 3819 /// ditto 3820 3821 alias PyUnicodeUCS4_DecodeUTF8 PyUnicode_DecodeUTF8; 3822 3823 /// _ 3824 PyObject* PyUnicodeUCS4_DecodeUTF8Stateful( 3825 const(char)* string, 3826 Py_ssize_t length, 3827 const(char)* errors, 3828 Py_ssize_t* consumed 3829 ); 3830 /// ditto 3831 3832 alias PyUnicodeUCS4_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 3833 3834 /// _ 3835 PyObject* PyUnicodeUCS4_AsUTF8String(PyObject* unicode); 3836 /// ditto 3837 3838 alias PyUnicodeUCS4_AsUTF8String PyUnicode_AsUTF8String; 3839 3840 /// _ 3841 PyObject* PyUnicodeUCS4_EncodeUTF8( 3842 Py_UNICODE* data, 3843 Py_ssize_t length, 3844 const(char) *errors); 3845 /// ditto 3846 3847 alias PyUnicodeUCS4_EncodeUTF8 PyUnicode_EncodeUTF8; 3848 3849 3850 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 3851 the corresponding Unicode object. 3852 3853 errors (if non-NULL) defines the error handling. It defaults 3854 to "strict". 3855 3856 If byteorder is non-NULL, the decoder starts decoding using the 3857 given byte order: 3858 3859 *byteorder == -1: little endian 3860 *byteorder == 0: native order 3861 *byteorder == 1: big endian 3862 3863 In native mode, the first two bytes of the stream are checked for a 3864 BOM mark. If found, the BOM mark is analysed, the byte order 3865 adjusted and the BOM skipped. In the other modes, no BOM mark 3866 interpretation is done. After completion, *byteorder is set to the 3867 current byte order at the end of input data. 3868 3869 If byteorder is NULL, the codec starts in native order mode. 3870 3871 */ 3872 PyObject* PyUnicodeUCS4_DecodeUTF16( 3873 const(char)* string, 3874 Py_ssize_t length, 3875 const(char)* errors, 3876 int* byteorder); 3877 /// ditto 3878 3879 alias PyUnicodeUCS4_DecodeUTF16 PyUnicode_DecodeUTF16; 3880 3881 /** 3882 Params: 3883 string = UTF-16 encoded string 3884 length = size of string 3885 errors = error handling 3886 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3887 consumed = bytes consumed 3888 */ 3889 PyObject* PyUnicodeUCS4_DecodeUTF16Stateful( 3890 const(char)* string, 3891 Py_ssize_t length, 3892 const(char)* errors, 3893 int* byteorder, 3894 Py_ssize_t* consumed 3895 ); 3896 /// ditto 3897 3898 alias PyUnicodeUCS4_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 3899 3900 /** Returns a Python string using the UTF-16 encoding in native byte 3901 order. The string always starts with a BOM mark. */ 3902 PyObject* PyUnicodeUCS4_AsUTF16String(PyObject *unicode); 3903 /// ditto 3904 3905 alias PyUnicodeUCS4_AsUTF16String PyUnicode_AsUTF16String; 3906 3907 /** Returns a Python string object holding the UTF-16 encoded value of 3908 the Unicode data. 3909 3910 If byteorder is not 0, output is written according to the following 3911 byte order: 3912 3913 byteorder == -1: little endian 3914 byteorder == 0: native byte order (writes a BOM mark) 3915 byteorder == 1: big endian 3916 3917 If byteorder is 0, the output string will always start with the 3918 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3919 prepended. 3920 3921 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 3922 UCS-2. This trick makes it possible to add full UTF-16 capabilities 3923 at a later point without compromising the APIs. 3924 3925 */ 3926 PyObject* PyUnicodeUCS4_EncodeUTF16( 3927 Py_UNICODE* data, 3928 Py_ssize_t length, 3929 const(char)* errors, 3930 int byteorder 3931 ); 3932 /// ditto 3933 3934 alias PyUnicodeUCS4_EncodeUTF16 PyUnicode_EncodeUTF16; 3935 3936 3937 /// _ 3938 PyObject* PyUnicodeUCS4_DecodeUnicodeEscape( 3939 const(char)* string, 3940 Py_ssize_t length, 3941 const(char)* errors); 3942 /// ditto 3943 3944 alias PyUnicodeUCS4_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 3945 3946 /// _ 3947 PyObject* PyUnicodeUCS4_AsUnicodeEscapeString( 3948 PyObject* unicode); 3949 /// ditto 3950 3951 alias PyUnicodeUCS4_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 3952 3953 /// _ 3954 PyObject* PyUnicodeUCS4_EncodeUnicodeEscape( 3955 Py_UNICODE* data, 3956 Py_ssize_t length); 3957 /// ditto 3958 3959 alias PyUnicodeUCS4_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 3960 3961 /** 3962 Params: 3963 string = Raw-Unicode-Escape encoded string 3964 length = size of string 3965 errors = error handling 3966 */ 3967 PyObject* PyUnicodeUCS4_DecodeRawUnicodeEscape( 3968 const(char)* string, 3969 Py_ssize_t length, 3970 const(char)* errors); 3971 /// ditto 3972 3973 alias PyUnicodeUCS4_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 3974 3975 /// _ 3976 PyObject* PyUnicodeUCS4_AsRawUnicodeEscapeString(PyObject* unicode); 3977 /// ditto 3978 3979 alias PyUnicodeUCS4_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 3980 3981 /// _ 3982 PyObject* PyUnicodeUCS4_EncodeRawUnicodeEscape( 3983 Py_UNICODE* data, Py_ssize_t length); 3984 /// ditto 3985 3986 alias PyUnicodeUCS4_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 3987 3988 3989 /// _ 3990 PyObject* _PyUnicodeUCS4_DecodeUnicodeInternal( 3991 const(char)* string, 3992 Py_ssize_t length, 3993 const(char)* errors); 3994 /// ditto 3995 3996 alias _PyUnicodeUCS4_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 3997 3998 3999 /** 4000 Params: 4001 string = Latin-1 encoded string 4002 length = size of string 4003 errors = error handling 4004 */ 4005 PyObject* PyUnicodeUCS4_DecodeLatin1( 4006 const(char)* string, 4007 Py_ssize_t length, 4008 const(char)* errors); 4009 /// ditto 4010 4011 alias PyUnicodeUCS4_DecodeLatin1 PyUnicode_DecodeLatin1; 4012 4013 /// _ 4014 PyObject* PyUnicodeUCS4_AsLatin1String(PyObject *unicode); 4015 /// ditto 4016 4017 alias PyUnicodeUCS4_AsLatin1String PyUnicode_AsLatin1String; 4018 4019 /** 4020 Params: 4021 data = Unicode char buffer 4022 length = Number of Py_UNICODE chars to encode 4023 errors = error handling 4024 */ 4025 PyObject* PyUnicodeUCS4_EncodeLatin1( 4026 Py_UNICODE* data, 4027 Py_ssize_t length, 4028 const(char)* errors); 4029 /// ditto 4030 4031 alias PyUnicodeUCS4_EncodeLatin1 PyUnicode_EncodeLatin1; 4032 4033 4034 /** 4035 Params: 4036 data = Unicode char buffer 4037 length = Number of Py_UNICODE chars to encode 4038 errors = error handling 4039 */ 4040 PyObject* PyUnicodeUCS4_DecodeASCII( 4041 const(char)* string, 4042 Py_ssize_t length, 4043 const(char)* errors); 4044 /// ditto 4045 4046 alias PyUnicodeUCS4_DecodeASCII PyUnicode_DecodeASCII; 4047 4048 /// _ 4049 PyObject* PyUnicodeUCS4_AsASCIIString(PyObject *unicode); 4050 /// ditto 4051 4052 alias PyUnicodeUCS4_AsASCIIString PyUnicode_AsASCIIString; 4053 4054 /** 4055 Params: 4056 data = Unicode char buffer 4057 length = Number of Py_UNICODE chars to encode 4058 errors = error handling 4059 */ 4060 PyObject* PyUnicodeUCS4_EncodeASCII( 4061 Py_UNICODE* data, 4062 Py_ssize_t length, 4063 const(char)* errors); 4064 /// ditto 4065 4066 alias PyUnicodeUCS4_EncodeASCII PyUnicode_EncodeASCII; 4067 4068 4069 /** 4070 Params: 4071 string = Encoded string 4072 length = size of string 4073 mapping = character mapping (char ordinal -> unicode ordinal) 4074 errors = error handling 4075 */ 4076 PyObject* PyUnicodeUCS4_DecodeCharmap( 4077 const(char)* string, 4078 Py_ssize_t length, 4079 PyObject* mapping, 4080 const(char)* errors 4081 ); 4082 /// ditto 4083 4084 alias PyUnicodeUCS4_DecodeCharmap PyUnicode_DecodeCharmap; 4085 4086 /** 4087 Params: 4088 unicode = Unicode object 4089 mapping = character mapping (unicode ordinal -> char ordinal) 4090 */ 4091 PyObject* PyUnicodeUCS4_AsCharmapString( 4092 PyObject* unicode, 4093 PyObject* mapping); 4094 /// ditto 4095 4096 alias PyUnicodeUCS4_AsCharmapString PyUnicode_AsCharmapString; 4097 4098 /** 4099 Params: 4100 data = Unicode char buffer 4101 length = Number of Py_UNICODE chars to encode 4102 mapping = character mapping (unicode ordinal -> char ordinal) 4103 errors = error handling 4104 */ 4105 PyObject* PyUnicodeUCS4_EncodeCharmap( 4106 Py_UNICODE* data, 4107 Py_ssize_t length, 4108 PyObject* mapping, 4109 const(char)* errors 4110 ); 4111 /// ditto 4112 4113 alias PyUnicodeUCS4_EncodeCharmap PyUnicode_EncodeCharmap; 4114 4115 /** Translate a Py_UNICODE buffer of the given length by applying a 4116 character mapping table to it and return the resulting Unicode 4117 object. 4118 4119 The mapping table must map Unicode ordinal integers to Unicode 4120 ordinal integers or None (causing deletion of the character). 4121 4122 Mapping tables may be dictionaries or sequences. Unmapped character 4123 ordinals (ones which cause a LookupError) are left untouched and 4124 are copied as-is. 4125 4126 */ 4127 PyObject* PyUnicodeUCS4_TranslateCharmap( 4128 Py_UNICODE* data, 4129 Py_ssize_t length, 4130 PyObject* table, 4131 const(char)* errors 4132 ); 4133 /// ditto 4134 4135 alias PyUnicodeUCS4_TranslateCharmap PyUnicode_TranslateCharmap; 4136 4137 4138 version (Windows) { 4139 /// Availability: Windows only 4140 PyObject* PyUnicodeUCS4_DecodeMBCS( 4141 const(char)* string, 4142 Py_ssize_t length, 4143 const(char)* errors); 4144 /// ditto 4145 4146 alias PyUnicodeUCS4_DecodeMBCS PyUnicode_DecodeMBCS; 4147 4148 /// Availability: Windows only 4149 PyObject* PyUnicodeUCS4_AsMBCSString(PyObject* unicode); 4150 /// ditto 4151 4152 alias PyUnicodeUCS4_AsMBCSString PyUnicode_AsMBCSString; 4153 4154 /// Availability: Windows only 4155 PyObject* PyUnicodeUCS4_EncodeMBCS( 4156 Py_UNICODE* data, 4157 Py_ssize_t length, 4158 const(char)* errors); 4159 /// ditto 4160 4161 alias PyUnicodeUCS4_EncodeMBCS PyUnicode_EncodeMBCS; 4162 4163 } 4164 /** Takes a Unicode string holding a decimal value and writes it into 4165 an output buffer using standard ASCII digit codes. 4166 4167 The output buffer has to provide at least length+1 bytes of storage 4168 area. The output string is 0-terminated. 4169 4170 The encoder converts whitespace to ' ', decimal characters to their 4171 corresponding ASCII digit and all other Latin-1 characters except 4172 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 4173 are treated as errors. This includes embedded NULL bytes. 4174 4175 Error handling is defined by the errors argument: 4176 4177 NULL or "strict": raise a ValueError 4178 "ignore": ignore the wrong characters (these are not copied to the 4179 output buffer) 4180 "replace": replaces illegal characters with '?' 4181 4182 Returns 0 on success, -1 on failure. 4183 4184 */ 4185 int PyUnicodeUCS4_EncodeDecimal( 4186 Py_UNICODE* s, 4187 Py_ssize_t length, 4188 char* output, 4189 const(char)* errors); 4190 /// ditto 4191 4192 alias PyUnicodeUCS4_EncodeDecimal PyUnicode_EncodeDecimal; 4193 4194 4195 /** Concat two strings giving a new Unicode string. */ 4196 PyObject* PyUnicodeUCS4_Concat( 4197 PyObject* left, 4198 PyObject* right); 4199 /// ditto 4200 4201 alias PyUnicodeUCS4_Concat PyUnicode_Concat; 4202 4203 4204 version(Python_3_0_Or_Later) { 4205 /** Concat two strings and put the result in *pleft 4206 (sets *pleft to NULL on error) 4207 Params: 4208 pleft = Pointer to left string 4209 right = Right string 4210 */ 4211 /// Availability: 3.* 4212 4213 void PyUnicodeUCS4_Append( 4214 PyObject** pleft, 4215 PyObject* right 4216 ); 4217 /// ditto 4218 4219 alias PyUnicodeUCS4_Append PyUnicode_Append; 4220 4221 4222 /** Concat two strings, put the result in *pleft and drop the right object 4223 (sets *pleft to NULL on error) 4224 Params: 4225 pleft = Pointer to left string 4226 */ 4227 /// Availability: 3.* 4228 void PyUnicodeUCS4_AppendAndDel( 4229 PyObject** pleft, 4230 PyObject* right 4231 ); 4232 /// ditto 4233 4234 alias PyUnicodeUCS4_AppendAndDel PyUnicode_AppendAndDel; 4235 4236 } 4237 4238 /** Split a string giving a list of Unicode strings. 4239 4240 If sep is NULL, splitting will be done at all whitespace 4241 substrings. Otherwise, splits occur at the given separator. 4242 4243 At most maxsplit splits will be done. If negative, no limit is set. 4244 4245 Separators are not included in the resulting list. 4246 4247 */ 4248 PyObject* PyUnicodeUCS4_Split( 4249 PyObject* s, 4250 PyObject* sep, 4251 Py_ssize_t maxsplit); 4252 /// ditto 4253 4254 alias PyUnicodeUCS4_Split PyUnicode_Split; 4255 4256 4257 /** Ditto PyUnicode_Split, but split at line breaks. 4258 4259 CRLF is considered to be one line break. Line breaks are not 4260 included in the resulting list. */ 4261 PyObject* PyUnicodeUCS4_Splitlines( 4262 PyObject* s, 4263 int keepends); 4264 /// ditto 4265 4266 alias PyUnicodeUCS4_Splitlines PyUnicode_Splitlines; 4267 4268 4269 version(Python_2_5_Or_Later) { 4270 /** Partition a string using a given separator. */ 4271 /// Availability: >= 2.5 4272 PyObject* PyUnicodeUCS4_Partition( 4273 PyObject* s, 4274 PyObject* sep 4275 ); 4276 /// ditto 4277 4278 alias PyUnicodeUCS4_Partition PyUnicode_Partition; 4279 4280 4281 /** Partition a string using a given separator, searching from the end 4282 of the string. */ 4283 4284 PyObject* PyUnicodeUCS4_RPartition( 4285 PyObject* s, 4286 PyObject* sep 4287 ); 4288 /// ditto 4289 4290 alias PyUnicodeUCS4_RPartition PyUnicode_RPartition; 4291 4292 } 4293 4294 /** Split a string giving a list of Unicode strings. 4295 4296 If sep is NULL, splitting will be done at all whitespace 4297 substrings. Otherwise, splits occur at the given separator. 4298 4299 At most maxsplit splits will be done. But unlike PyUnicode_Split 4300 PyUnicode_RSplit splits from the end of the string. If negative, 4301 no limit is set. 4302 4303 Separators are not included in the resulting list. 4304 4305 */ 4306 PyObject* PyUnicodeUCS4_RSplit( 4307 PyObject* s, 4308 PyObject* sep, 4309 Py_ssize_t maxsplit); 4310 /// ditto 4311 4312 alias PyUnicodeUCS4_RSplit PyUnicode_RSplit; 4313 4314 4315 /** Translate a string by applying a character mapping table to it and 4316 return the resulting Unicode object. 4317 4318 The mapping table must map Unicode ordinal integers to Unicode 4319 ordinal integers or None (causing deletion of the character). 4320 4321 Mapping tables may be dictionaries or sequences. Unmapped character 4322 ordinals (ones which cause a LookupError) are left untouched and 4323 are copied as-is. 4324 4325 */ 4326 PyObject* PyUnicodeUCS4_Translate( 4327 PyObject* str, 4328 PyObject* table, 4329 const(char)* errors); 4330 /// ditto 4331 4332 alias PyUnicodeUCS4_Translate PyUnicode_Translate; 4333 4334 4335 /** Join a sequence of strings using the given separator and return 4336 the resulting Unicode string. */ 4337 PyObject* PyUnicodeUCS4_Join( 4338 PyObject* separator, 4339 PyObject* seq); 4340 /// ditto 4341 4342 alias PyUnicodeUCS4_Join PyUnicode_Join; 4343 4344 4345 /** Return 1 if substr matches str[start:end] at the given tail end, 0 4346 otherwise. */ 4347 Py_ssize_t PyUnicodeUCS4_Tailmatch( 4348 PyObject* str, 4349 PyObject* substr, 4350 Py_ssize_t start, 4351 Py_ssize_t end, 4352 int direction 4353 ); 4354 /// ditto 4355 4356 alias PyUnicodeUCS4_Tailmatch PyUnicode_Tailmatch; 4357 4358 4359 /** Return the first position of substr in str[start:end] using the 4360 given search direction or -1 if not found. -2 is returned in case 4361 an error occurred and an exception is set. */ 4362 Py_ssize_t PyUnicodeUCS4_Find( 4363 PyObject* str, 4364 PyObject* substr, 4365 Py_ssize_t start, 4366 Py_ssize_t end, 4367 int direction 4368 ); 4369 /// ditto 4370 4371 alias PyUnicodeUCS4_Find PyUnicode_Find; 4372 4373 4374 /** Count the number of occurrences of substr in str[start:end]. */ 4375 Py_ssize_t PyUnicodeUCS4_Count( 4376 PyObject* str, 4377 PyObject* substr, 4378 Py_ssize_t start, 4379 Py_ssize_t end); 4380 /// ditto 4381 4382 alias PyUnicodeUCS4_Count PyUnicode_Count; 4383 4384 4385 /** Replace at most maxcount occurrences of substr in str with replstr 4386 and return the resulting Unicode object. */ 4387 PyObject* PyUnicodeUCS4_Replace( 4388 PyObject* str, 4389 PyObject* substr, 4390 PyObject* replstr, 4391 Py_ssize_t maxcount 4392 ); 4393 /// ditto 4394 4395 alias PyUnicodeUCS4_Replace PyUnicode_Replace; 4396 4397 4398 /** Compare two strings and return -1, 0, 1 for less than, equal, 4399 greater than resp. */ 4400 int PyUnicodeUCS4_Compare(PyObject* left, PyObject* right); 4401 /// ditto 4402 4403 alias PyUnicodeUCS4_Compare PyUnicode_Compare; 4404 4405 version(Python_3_0_Or_Later) { 4406 /** Compare two strings and return -1, 0, 1 for less than, equal, 4407 greater than resp. 4408 Params: 4409 left = 4410 right = ASCII-encoded string 4411 */ 4412 /// Availability: 3.* 4413 int PyUnicodeUCS4_CompareWithASCIIString( 4414 PyObject* left, 4415 const(char)* right 4416 ); 4417 /// ditto 4418 4419 alias PyUnicodeUCS4_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 4420 4421 } 4422 4423 version(Python_2_5_Or_Later) { 4424 /** Rich compare two strings and return one of the following: 4425 4426 - NULL in case an exception was raised 4427 - Py_True or Py_False for successfuly comparisons 4428 - Py_NotImplemented in case the type combination is unknown 4429 4430 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 4431 case the conversion of the arguments to Unicode fails with a 4432 UnicodeDecodeError. 4433 4434 Possible values for op: 4435 4436 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 4437 4438 */ 4439 /// Availability: >= 2.5 4440 PyObject* PyUnicodeUCS4_RichCompare( 4441 PyObject* left, 4442 PyObject* right, 4443 int op 4444 ); 4445 /// ditto 4446 4447 alias PyUnicodeUCS4_RichCompare PyUnicode_RichCompare; 4448 4449 } 4450 4451 /** Apply a argument tuple or dictionary to a format string and return 4452 the resulting Unicode string. */ 4453 PyObject* PyUnicodeUCS4_Format(PyObject* format, PyObject* args); 4454 /// ditto 4455 4456 alias PyUnicodeUCS4_Format PyUnicode_Format; 4457 4458 4459 /** Checks whether element is contained in container and return 1/0 4460 accordingly. 4461 4462 element has to coerce to an one element Unicode string. -1 is 4463 returned in case of an error. */ 4464 int PyUnicodeUCS4_Contains(PyObject* container, PyObject* element); 4465 /// ditto 4466 4467 alias PyUnicodeUCS4_Contains PyUnicode_Contains; 4468 4469 4470 version(Python_3_0_Or_Later) { 4471 /** Checks whether argument is a valid identifier. */ 4472 /// Availability: 3.* 4473 int PyUnicodeUCS4_IsIdentifier(PyObject* s); 4474 /// ditto 4475 4476 alias PyUnicodeUCS4_IsIdentifier PyUnicode_IsIdentifier; 4477 4478 } 4479 4480 4481 /// _ 4482 int _PyUnicodeUCS4_IsLowercase(Py_UNICODE ch); 4483 /// ditto 4484 4485 alias _PyUnicodeUCS4_IsLowercase _PyUnicode_IsLowercase; 4486 4487 /// _ 4488 int _PyUnicodeUCS4_IsUppercase(Py_UNICODE ch); 4489 /// ditto 4490 4491 alias _PyUnicodeUCS4_IsUppercase _PyUnicode_IsUppercase; 4492 4493 /// _ 4494 int _PyUnicodeUCS4_IsTitlecase(Py_UNICODE ch); 4495 /// ditto 4496 4497 alias _PyUnicodeUCS4_IsTitlecase _PyUnicode_IsTitlecase; 4498 4499 /// _ 4500 int _PyUnicodeUCS4_IsWhitespace(Py_UNICODE ch); 4501 /// ditto 4502 4503 alias _PyUnicodeUCS4_IsWhitespace _PyUnicode_IsWhitespace; 4504 4505 /// _ 4506 int _PyUnicodeUCS4_IsLinebreak(Py_UNICODE ch); 4507 /// ditto 4508 4509 alias _PyUnicodeUCS4_IsLinebreak _PyUnicode_IsLinebreak; 4510 4511 /// _ 4512 Py_UNICODE _PyUnicodeUCS4_ToLowercase(Py_UNICODE ch); 4513 /// ditto 4514 4515 alias _PyUnicodeUCS4_ToLowercase _PyUnicode_ToLowercase; 4516 4517 /// _ 4518 Py_UNICODE _PyUnicodeUCS4_ToUppercase(Py_UNICODE ch); 4519 /// ditto 4520 4521 alias _PyUnicodeUCS4_ToUppercase _PyUnicode_ToUppercase; 4522 4523 /// _ 4524 Py_UNICODE _PyUnicodeUCS4_ToTitlecase(Py_UNICODE ch); 4525 /// ditto 4526 4527 alias _PyUnicodeUCS4_ToTitlecase _PyUnicode_ToTitlecase; 4528 4529 /// _ 4530 int _PyUnicodeUCS4_ToDecimalDigit(Py_UNICODE ch); 4531 /// ditto 4532 4533 alias _PyUnicodeUCS4_ToDecimalDigit _PyUnicode_ToDecimalDigit; 4534 4535 /// _ 4536 int _PyUnicodeUCS4_ToDigit(Py_UNICODE ch); 4537 /// ditto 4538 4539 alias _PyUnicodeUCS4_ToDigit _PyUnicode_ToDigit; 4540 4541 /// _ 4542 double _PyUnicodeUCS4_ToNumeric(Py_UNICODE ch); 4543 /// ditto 4544 4545 alias _PyUnicodeUCS4_ToNumeric _PyUnicode_ToNumeric; 4546 4547 /// _ 4548 int _PyUnicodeUCS4_IsDecimalDigit(Py_UNICODE ch); 4549 /// ditto 4550 4551 alias _PyUnicodeUCS4_IsDecimalDigit _PyUnicode_IsDecimalDigit; 4552 4553 /// _ 4554 int _PyUnicodeUCS4_IsDigit(Py_UNICODE ch); 4555 /// ditto 4556 4557 alias _PyUnicodeUCS4_IsDigit _PyUnicode_IsDigit; 4558 4559 /// _ 4560 int _PyUnicodeUCS4_IsNumeric(Py_UNICODE ch); 4561 /// ditto 4562 4563 alias _PyUnicodeUCS4_IsNumeric _PyUnicode_IsNumeric; 4564 4565 /// _ 4566 int _PyUnicodeUCS4_IsAlpha(Py_UNICODE ch); 4567 /// ditto 4568 4569 alias _PyUnicodeUCS4_IsAlpha _PyUnicode_IsAlpha; 4570 4571 } 4572 version(Python_3_0_Or_Later) { 4573 /// Availability: 3.* 4574 size_t Py_UNICODE_strlen(const(Py_UNICODE)* u); 4575 4576 /// Availability: 3.* 4577 Py_UNICODE* Py_UNICODE_strcpy(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4578 4579 version(Python_3_2_Or_Later) { 4580 /// Availability: >= 3.2 4581 Py_UNICODE* Py_UNICODE_strcat(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4582 } 4583 4584 /// Availability: 3.* 4585 Py_UNICODE* Py_UNICODE_strncpy( 4586 Py_UNICODE* s1, 4587 const(Py_UNICODE)* s2, 4588 size_t n); 4589 4590 /// Availability: 3.* 4591 int Py_UNICODE_strcmp( 4592 const(Py_UNICODE)* s1, 4593 const(Py_UNICODE)* s2 4594 ); 4595 4596 version(Python_3_2_Or_Later) { 4597 /// Availability: >= 3.2 4598 int Py_UNICODE_strncmp( 4599 const(Py_UNICODE)* s1, 4600 const(Py_UNICODE)* s2, 4601 size_t n 4602 ); 4603 } 4604 4605 /// Availability: 3.* 4606 Py_UNICODE* Py_UNICODE_strchr( 4607 const(Py_UNICODE)* s, 4608 Py_UNICODE c 4609 ); 4610 4611 version(Python_3_2_Or_Later) { 4612 /// Availability: >= 3.2 4613 Py_UNICODE* Py_UNICODE_strrchr( 4614 const(Py_UNICODE)* s, 4615 Py_UNICODE c 4616 ); 4617 } 4618 4619 version(Python_3_5_Or_Later) { 4620 /// Availability: >= 3.5 4621 PyObject* _PyUnicode_FormatLong(PyObject*, int, int, int); 4622 } 4623 4624 version(Python_3_2_Or_Later) { 4625 /** Create a copy of a unicode string ending with a nul character. Return NULL 4626 and raise a MemoryError exception on memory allocation failure, otherwise 4627 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 4628 /// Availability: >= 3.2 4629 4630 Py_UNICODE* PyUnicode_AsUnicodeCopy( 4631 PyObject* unicode 4632 ); 4633 } 4634 } 4635 4636 4637 /// _ 4638 int _PyUnicode_IsTitlecase( 4639 Py_UCS4 ch /* Unicode character */ 4640 ); 4641 4642 /// _ 4643 int _PyUnicode_IsXidStart( 4644 Py_UCS4 ch /* Unicode character */ 4645 ); 4646 /** Externally visible for str.strip(unicode) */ 4647 PyObject* _PyUnicode_XStrip(PyUnicodeObject* self, int striptype, 4648 PyObject *sepobj 4649 ); 4650 version(Python_3_0_Or_Later) { 4651 version(Python_3_2_Or_Later) { 4652 /** Using the current locale, insert the thousands grouping 4653 into the string pointed to by buffer. For the argument descriptions, 4654 see Objects/stringlib/localeutil.h */ 4655 /// Availability: >= 3.2 4656 Py_ssize_t _PyUnicode_InsertThousandsGroupingLocale( 4657 Py_UNICODE* buffer, 4658 Py_ssize_t n_buffer, 4659 Py_UNICODE* digits, 4660 Py_ssize_t n_digits, 4661 Py_ssize_t min_width); 4662 } 4663 4664 /** Using explicit passed-in values, insert the thousands grouping 4665 into the string pointed to by buffer. For the argument descriptions, 4666 see Objects/stringlib/localeutil.h */ 4667 /// Availability: 3.* 4668 Py_ssize_t _PyUnicode_InsertThousandsGrouping( 4669 Py_UNICODE* buffer, 4670 Py_ssize_t n_buffer, 4671 Py_UNICODE* digits, 4672 Py_ssize_t n_digits, 4673 Py_ssize_t min_width, 4674 const(char)* grouping, 4675 const(char)* thousands_sep); 4676 } 4677 4678 version(Python_3_2_Or_Later) { 4679 /// Availability: >= 3.2 4680 PyObject* PyUnicode_TransformDecimalToASCII( 4681 Py_UNICODE *s, /* Unicode buffer */ 4682 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 4683 ); 4684 /* --- File system encoding ---------------------------------------------- */ 4685 4686 /** ParseTuple converter: encode str objects to bytes using 4687 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 4688 /// Availability: >= 3.2 4689 int PyUnicode_FSConverter(PyObject*, void*); 4690 4691 /** ParseTuple converter: decode bytes objects to unicode using 4692 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 4693 /// Availability: >= 3.2 4694 int PyUnicode_FSDecoder(PyObject*, void*); 4695 4696 /** Decode a null-terminated string using Py_FileSystemDefaultEncoding 4697 and the "surrogateescape" error handler. 4698 4699 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4700 encoding. 4701 4702 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 4703 */ 4704 /// Availability: >= 3.2 4705 PyObject* PyUnicode_DecodeFSDefault( 4706 const(char)* s /* encoded string */ 4707 ); 4708 4709 /** Decode a string using Py_FileSystemDefaultEncoding 4710 and the "surrogateescape" error handler. 4711 4712 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4713 encoding. 4714 */ 4715 /// Availability: >= 3.2 4716 PyObject* PyUnicode_DecodeFSDefaultAndSize( 4717 const(char)* s, /* encoded string */ 4718 Py_ssize_t size /* size */ 4719 ); 4720 4721 /** Encode a Unicode object to Py_FileSystemDefaultEncoding with the 4722 "surrogateescape" error handler, and return bytes. 4723 4724 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4725 encoding. 4726 */ 4727 /// Availability: >= 3.2 4728 PyObject* PyUnicode_EncodeFSDefault( 4729 PyObject* unicode 4730 ); 4731 } 4732 4733 /* 4734 alias _PyUnicode_IsWhitespace Py_UNICODE_ISSPACE; 4735 alias _PyUnicode_IsLowercase Py_UNICODE_ISLOWER; 4736 alias _PyUnicode_IsUppercase Py_UNICODE_ISUPPER; 4737 alias _PyUnicode_IsTitlecase Py_UNICODE_ISTITLE; 4738 alias _PyUnicode_IsLinebreak Py_UNICODE_ISLINEBREAK; 4739 alias _PyUnicode_ToLowercase Py_UNICODE_TOLOWER; 4740 alias _PyUnicode_ToUppercase Py_UNICODE_TOUPPER; 4741 alias _PyUnicode_ToTitlecase Py_UNICODE_TOTITLE; 4742 alias _PyUnicode_IsDecimalDigit Py_UNICODE_ISDECIMAL; 4743 alias _PyUnicode_IsDigit Py_UNICODE_ISDIGIT; 4744 alias _PyUnicode_IsNumeric Py_UNICODE_ISNUMERIC; 4745 alias _PyUnicode_ToDecimalDigit Py_UNICODE_TODECIMAL; 4746 alias _PyUnicode_ToDigit Py_UNICODE_TODIGIT; 4747 alias _PyUnicode_ToNumeric Py_UNICODE_TONUMERIC; 4748 alias _PyUnicode_IsAlpha Py_UNICODE_ISALPHA; 4749 */ 4750 4751 /// _ 4752 int Py_UNICODE_ISALNUM()(Py_UNICODE ch) { 4753 return ( 4754 Py_UNICODE_ISALPHA(ch) 4755 || Py_UNICODE_ISDECIMAL(ch) 4756 || Py_UNICODE_ISDIGIT(ch) 4757 || Py_UNICODE_ISNUMERIC(ch) 4758 ); 4759 } 4760 4761 /// _ 4762 void Py_UNICODE_COPY()(void* target, void* source, size_t length) { 4763 memcpy(target, source, cast(uint)(length* Py_UNICODE.sizeof)); 4764 } 4765 4766 /// _ 4767 void Py_UNICODE_FILL()(Py_UNICODE* target, Py_UNICODE value, size_t length) { 4768 for (size_t i = 0; i < length; i++) { 4769 target[i] = value; 4770 } 4771 } 4772 4773 /// _ 4774 int Py_UNICODE_MATCH()(PyUnicodeObject* string, size_t offset, 4775 PyUnicodeObject* substring 4776 ) 4777 { 4778 return ( 4779 (*(string.str + offset) == *(substring.str)) 4780 && !memcmp(string.str + offset, substring.str, 4781 substring.length * Py_UNICODE.sizeof 4782 ) 4783 ); 4784 } 4785 4786