1 /** 2 Mirror _unicodeobject.h 3 4 Unicode API names are mangled to assure that UCS-2 and UCS-4 builds 5 produce different external names and thus cause import errors in 6 case Python interpreters and extensions with mixed compiled in 7 Unicode width assumptions are combined. 8 */ 9 module deimos.python.unicodeobject; 10 11 import core.stdc.stdarg; 12 import core.stdc.string; 13 import core.stdc.stddef : wchar_t; 14 15 import deimos.python.pyport; 16 import deimos.python.object; 17 18 extern(C): 19 // Python-header-file: Include/unicodeobject.h: 20 21 /** Py_UNICODE is the native Unicode storage format (code unit) used by 22 Python and represents a single Unicode element in the Unicode 23 type. */ 24 version (Python_Unicode_UCS2) { 25 version (Windows) { 26 alias wchar_t Py_UNICODE; 27 } else { 28 alias ushort Py_UNICODE; 29 } 30 } else { 31 alias uint Py_UNICODE; 32 } 33 alias Py_UNICODE Py_UCS4; 34 alias ubyte Py_UCS1; 35 alias ushort Py_UCS2; 36 37 version(Python_3_4_Or_Later) { 38 /** There are 4 forms of Unicode strings: 39 - compact ascii: 40 * structure = PyASCIIObject 41 * test: PyUnicode_IS_COMPACT_ASCII(op) 42 * kind = PyUnicode_1BYTE_KIND 43 * compact = 1 44 * ascii = 1 45 * ready = 1 46 * (length is the length of the utf8 and wstr strings) 47 * (data starts just after the structure) 48 * (since ASCII is decoded from UTF-8, the utf8 string are the data) 49 - compact: 50 * structure = PyCompactUnicodeObject 51 * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) 52 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 53 PyUnicode_4BYTE_KIND 54 * compact = 1 55 * ready = 1 56 * ascii = 0 57 * utf8 is not shared with data 58 * utf8_length = 0 if utf8 is NULL 59 * wstr is shared with data and wstr_length=length 60 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 61 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 62 * wstr_length = 0 if wstr is NULL 63 * (data starts just after the structure) 64 - legacy string, not ready: 65 * structure = PyUnicodeObject 66 * test: kind == PyUnicode_WCHAR_KIND 67 * length = 0 (use wstr_length) 68 * hash = -1 69 * kind = PyUnicode_WCHAR_KIND 70 * compact = 0 71 * ascii = 0 72 * ready = 0 73 * interned = SSTATE_NOT_INTERNED 74 * wstr is not NULL 75 * data.any is NULL 76 * utf8 is NULL 77 * utf8_length = 0 78 - legacy string, ready: 79 * structure = PyUnicodeObject structure 80 * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND 81 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 82 PyUnicode_4BYTE_KIND 83 * compact = 0 84 * ready = 1 85 * data.any is not NULL 86 * utf8 is shared and utf8_length = length with data.any if ascii = 1 87 * utf8_length = 0 if utf8 is NULL 88 * wstr is shared with data.any and wstr_length = length 89 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 90 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 91 * wstr_length = 0 if wstr is NULL 92 Compact strings use only one memory block (structure + characters), 93 whereas legacy strings use one block for the structure and one block 94 for characters. 95 Legacy strings are created by PyUnicode_FromUnicode() and 96 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready 97 when PyUnicode_READY() is called. 98 See also _PyUnicode_CheckConsistency(). 99 Availability >= 3.4 100 */ 101 struct PyASCIIObject { 102 mixin PyObject_HEAD; 103 /** Number of code points in the string */ 104 Py_ssize_t length; 105 /** Hash value; -1 if not set */ 106 Py_hash_t hash; 107 /// _ 108 int state; 109 /** wchar_t representation (null-terminated) */ 110 wchar_t* wstr; 111 } 112 113 /// Availability >= 3.4 114 struct PyCompactUnicodeObject { 115 /// _ 116 PyASCIIObject _base; 117 /// _ 118 Py_ssize_t utf8_length; 119 /// _ 120 char* utf8; 121 /// _ 122 Py_ssize_t wstr_length; 123 } 124 125 /** 126 subclass of PyObject. 127 */ 128 struct PyUnicodeObject { 129 PyCompactUnicodeObject _base; 130 PyUnicodeObject_data data; 131 } 132 133 union PyUnicodeObject_data { 134 void* any; 135 Py_UCS1* latin1; 136 Py_UCS2* ucs2; 137 Py_UCS4* ucs4; 138 } 139 }else{ 140 /** 141 subclass of PyObject. 142 */ 143 struct PyUnicodeObject { 144 mixin PyObject_HEAD; 145 /** Length of raw Unicode data in buffer */ 146 Py_ssize_t length; 147 /** Raw Unicode buffer */ 148 Py_UNICODE* str; 149 /** Hash value; -1 if not set */ 150 C_long hash; 151 /** (Default) Encoded version as Python 152 string, or NULL; this is used for 153 implementing the buffer protocol */ 154 PyObject* defenc; 155 } 156 } 157 158 /// _ 159 mixin(PyAPI_DATA!"PyTypeObject PyUnicode_Type"); 160 161 // D translations of C macros: 162 /** Fast access macros */ 163 int PyUnicode_Check()(PyObject* op) { 164 return PyObject_TypeCheck(op, &PyUnicode_Type); 165 } 166 /// ditto 167 int PyUnicode_CheckExact()(PyObject* op) { 168 return Py_TYPE(op) == &PyUnicode_Type; 169 } 170 171 /// ditto 172 size_t PyUnicode_GET_SIZE()(PyUnicodeObject* op) { 173 return op.length; 174 } 175 /// ditto 176 size_t PyUnicode_GET_DATA_SIZE()(PyUnicodeObject* op) { 177 return op.length * Py_UNICODE.sizeof; 178 } 179 /// ditto 180 Py_UNICODE* PyUnicode_AS_UNICODE()(PyUnicodeObject* op) { 181 return op.str; 182 } 183 /// ditto 184 const(char)* PyUnicode_AS_DATA()(PyUnicodeObject* op) { 185 return cast(const(char)*) op.str; 186 } 187 188 /** This Unicode character will be used as replacement character during 189 decoding if the errors argument is set to "replace". Note: the 190 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 191 Unicode 3.0. */ 192 enum Py_UNICODE Py_UNICODE_REPLACEMENT_CHARACTER = 0xFFFD; 193 194 version(Python_3_3_Or_Later) { 195 enum PyUnicode_ = "PyUnicode_"; 196 }else version(Python_Unicode_UCS2) { 197 enum PyUnicode_ = "PyUnicodeUCS2_"; 198 }else{ 199 enum PyUnicode_ = "PyUnicodeUCS4_"; 200 } 201 202 /* 203 this function takes defs PyUnicode_XX and transforms them to 204 PyUnicodeUCS4_XX(); 205 alias PyUnicodeUCS4_XX PyUnicode_XX; 206 207 */ 208 string substitute_and_alias()(string code) { 209 import std.algorithm; 210 import std.array; 211 string[] newcodes; 212 LOOP: 213 while(true) { 214 if(startsWith(code,"/*")) { 215 size_t comm_end_index = countUntil(code[2 .. $], "*/"); 216 if(comm_end_index == -1) break; 217 newcodes ~= code[0 .. comm_end_index]; 218 code = code[comm_end_index .. $]; 219 continue; 220 } 221 if(!(startsWith(code,"PyUnicode_") || startsWith(code,"_PyUnicode"))) { 222 size_t index = 0; 223 while(index < code.length) { 224 if(code[index] == '_') { 225 if(startsWith(code[index .. $], "_PyUnicode_")) { 226 break; 227 } 228 }else if(code[index] == 'P') { 229 if(startsWith(code[index .. $], "PyUnicode_")) { 230 break; 231 } 232 }else if(code[index] == '/') { 233 if(startsWith(code[index .. $], "/*")) { 234 break; 235 } 236 } 237 index++; 238 } 239 if(index == code.length) break; 240 newcodes ~= code[0 .. index]; 241 code = code[index .. $]; 242 continue; 243 } 244 size_t end_index = countUntil(code, "("); 245 if(end_index == -1) break; 246 string alias_name = code[0 .. end_index]; 247 string func_name = replace(alias_name, "PyUnicode_", PyUnicode_); 248 size_t index0 = end_index+1; 249 int parencount = 1; 250 while(parencount && index0 < code.length) { 251 if(startsWith(code[index0 .. $], "/*")) { 252 size_t comm_end_index = countUntil(code[index0+2 .. $], "*/"); 253 if(comm_end_index == -1) break LOOP; 254 index0 += comm_end_index; 255 continue; 256 }else if(code[index0] == '(') { 257 parencount++; 258 index0++; 259 }else if(code[index0] == ')') { 260 parencount--; 261 index0++; 262 }else{ 263 index0++; 264 } 265 } 266 size_t semi = countUntil(code[index0 .. $], ";"); 267 if(semi == -1) break; 268 index0 += semi+1; 269 270 string alias_line = "\nalias " ~ func_name ~ " " ~ alias_name ~ ";\n"; 271 newcodes ~= func_name; 272 newcodes ~= code[end_index .. index0]; 273 newcodes ~= "\n /// ditto \n"; 274 newcodes ~= alias_line; 275 276 code = code[index0 .. $]; 277 } 278 279 string newcode; 280 foreach(c; newcodes) { 281 newcode ~= c; 282 } 283 return newcode; 284 } 285 286 enum string unicode_funs = q{ 287 version(Python_2_6_Or_Later) { 288 289 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 290 size. 291 292 u may be NULL which causes the contents to be undefined. It is the 293 user's responsibility to fill in the needed data afterwards. Note 294 that modifying the Unicode object contents after construction is 295 only allowed if u was set to NULL. 296 297 The buffer is copied into the new object. */ 298 /// Availability: >= 2.6 299 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 300 301 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 302 /// Availability: >= 2.6 303 PyObject* PyUnicode_FromStringAndSize( 304 const(char)*u, /* char buffer */ 305 Py_ssize_t size /* size of buffer */ 306 ); 307 308 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 309 Latin-1 encoded bytes */ 310 /// Availability: >= 2.6 311 PyObject* PyUnicode_FromString( 312 const(char)*u /* string */ 313 ); 314 /// Availability: >= 2.6 315 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 316 /// Availability: >= 2.6 317 PyObject* PyUnicode_FromFormat(const(char)*, ...); 318 319 /** Format the object based on the format_spec, as defined in PEP 3101 320 (Advanced String Formatting). */ 321 /// Availability: >= 2.6 322 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 323 Py_UNICODE *format_spec, 324 Py_ssize_t format_spec_len); 325 /// Availability: >= 2.6 326 int PyUnicode_ClearFreeList(); 327 /** 328 Params: 329 string = UTF-7 encoded string 330 length = size of string 331 error = error handling 332 consumed = bytes consumed 333 */ 334 /// Availability: >= 2.6 335 PyObject* PyUnicode_DecodeUTF7Stateful( 336 const(char)* string, 337 Py_ssize_t length, 338 const(char)*errors, 339 Py_ssize_t *consumed 340 ); 341 /** 342 Params: 343 string = UTF-32 encoded string 344 length = size of string 345 error = error handling 346 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 347 */ 348 /// Availability: >= 2.6 349 PyObject* PyUnicode_DecodeUTF32( 350 const(char)* string, 351 Py_ssize_t length, 352 const(char)*errors, 353 int *byteorder 354 ); 355 356 /** 357 Params: 358 string = UTF-32 encoded string 359 length = size of string 360 error = error handling 361 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 362 */ 363 /// Availability: >= 2.6 364 PyObject* PyUnicode_DecodeUTF32Stateful( 365 const(char)*string, 366 Py_ssize_t length, 367 const(char)*errors, 368 int *byteorder, 369 Py_ssize_t *consumed 370 ); 371 /** Returns a Python string using the UTF-32 encoding in native byte 372 order. The string always starts with a BOM mark. */ 373 /// Availability: >= 2.6 374 375 PyObject* PyUnicode_AsUTF32String( 376 PyObject *unicode 377 ); 378 379 /** Returns a Python string object holding the UTF-32 encoded value of 380 the Unicode data. 381 382 If byteorder is not 0, output is written according to the following 383 byte order: 384 385 byteorder == -1: little endian 386 byteorder == 0: native byte order (writes a BOM mark) 387 byteorder == 1: big endian 388 389 If byteorder is 0, the output string will always start with the 390 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 391 prepended. 392 Params: 393 data = Unicode char buffer 394 length = number of Py_UNICODE chars to encode 395 errors = error handling 396 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 397 398 */ 399 /// Availability: >= 2.6 400 PyObject* PyUnicode_EncodeUTF32( 401 const Py_UNICODE *data, 402 Py_ssize_t length, 403 const(char)* errors, 404 int byteorder 405 ); 406 } 407 408 /** Return a read-only pointer to the Unicode object's internal 409 Py_UNICODE buffer. */ 410 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 411 /** Get the length of the Unicode object. */ 412 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 413 414 /** Get the maximum ordinal for a Unicode character. */ 415 Py_UNICODE PyUnicode_GetMax(); 416 417 /** Resize an already allocated Unicode object to the new size length. 418 419 _*unicode is modified to point to the new (resized) object and 0 420 returned on success. 421 422 This API may only be called by the function which also called the 423 Unicode constructor. The refcount on the object must be 1. Otherwise, 424 an error is returned. 425 426 Error handling is implemented as follows: an exception is set, -1 427 is returned and *unicode left untouched. 428 Params: 429 unicode = pointer to the new unicode object. 430 length = New length. 431 432 */ 433 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 434 /** Coerce obj to an Unicode object and return a reference with 435 _*incremented* refcount. 436 437 Coercion is done in the following way: 438 439 1. String and other char buffer compatible objects are decoded 440 under the assumptions that they contain data using the current 441 default encoding. Decoding is done in "strict" mode. 442 443 2. All other objects (including Unicode objects) raise an 444 exception. 445 446 The API returns NULL in case of an error. The caller is responsible 447 for decref'ing the returned objects. 448 449 */ 450 PyObject* PyUnicode_FromEncodedObject( 451 PyObject* obj, 452 const(char)* encoding, 453 const(char)* errors); 454 455 /** Coerce obj to an Unicode object and return a reference with 456 _*incremented* refcount. 457 458 Unicode objects are passed back as-is (subclasses are converted to 459 true Unicode objects), all other objects are delegated to 460 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 461 using the default encoding as basis for decoding the object. 462 463 The API returns NULL in case of an error. The caller is responsible 464 for decref'ing the returned objects. 465 466 */ 467 PyObject* PyUnicode_FromObject(PyObject* obj); 468 469 /** Create a Unicode Object from the whcar_t buffer w of the given 470 size. 471 472 The buffer is copied into the new object. */ 473 PyObject* PyUnicode_FromWideChar(const(wchar_t)* w, Py_ssize_t size); 474 475 /** Copies the Unicode Object contents into the wchar_t buffer w. At 476 most size wchar_t characters are copied. 477 478 Note that the resulting wchar_t string may or may not be 479 0-terminated. It is the responsibility of the caller to make sure 480 that the wchar_t string is 0-terminated in case this is required by 481 the application. 482 483 Returns the number of wchar_t characters copied (excluding a 484 possibly trailing 0-termination character) or -1 in case of an 485 error. */ 486 Py_ssize_t PyUnicode_AsWideChar( 487 PyUnicodeObject* unicode, 488 const(wchar_t)* w, 489 Py_ssize_t size); 490 491 /** Create a Unicode Object from the given Unicode code point ordinal. 492 493 The ordinal must be in range(0x10000) on narrow Python builds 494 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 495 raised in case it is not. 496 497 */ 498 PyObject* PyUnicode_FromOrdinal(int ordinal); 499 500 /** Return a Python string holding the default encoded value of the 501 Unicode object. 502 503 The resulting string is cached in the Unicode object for subsequent 504 usage by this function. The cached version is needed to implement 505 the character buffer interface and will live (at least) as long as 506 the Unicode object itself. 507 508 The refcount of the string is *not* incremented. 509 510 _*** Exported for internal use by the interpreter only !!! *** 511 512 */ 513 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 514 515 /** Returns the currently active default encoding. 516 517 The default encoding is currently implemented as run-time settable 518 process global. This may change in future versions of the 519 interpreter to become a parameter which is managed on a per-thread 520 basis. 521 522 */ 523 const(char)* PyUnicode_GetDefaultEncoding(); 524 525 /** Sets the currently active default encoding. 526 527 Returns 0 on success, -1 in case of an error. 528 529 */ 530 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 531 532 /** Create a Unicode object by decoding the encoded string s of the 533 given size. 534 Params: 535 s = encoded string 536 size = size of buffer 537 encoding = encoding 538 errors = error handling 539 */ 540 PyObject* PyUnicode_Decode( 541 const(char)* s, 542 Py_ssize_t size, 543 const(char)* encoding, 544 const(char)* errors); 545 546 version(Python_3_6_Or_Later) { 547 /** Decode a Unicode object unicode and return the result as Python 548 object. */ 549 /// Deprecated in 3.6 550 deprecated("Deprecated in 3.6") 551 PyObject* PyUnicode_AsDecodedObject( 552 PyObject* unicode, 553 const(char)* encoding, 554 const(char)* errors 555 ); 556 /** Decode a Unicode object unicode and return the result as Unicode 557 object. */ 558 /// Availability: 3.* 559 560 /// Deprecated in 3.6 561 deprecated("Deprecated in 3.6") 562 PyObject* PyUnicode_AsDecodedUnicode( 563 PyObject* unicode, 564 const(char)* encoding, 565 const(char)* errors 566 ); 567 }else version(Python_3_0_Or_Later) { 568 /** Decode a Unicode object unicode and return the result as Python 569 object. */ 570 /// Availability: 3.* 571 PyObject* PyUnicode_AsDecodedObject( 572 PyObject* unicode, 573 const(char)* encoding, 574 const(char)* errors 575 ); 576 /** Decode a Unicode object unicode and return the result as Unicode 577 object. */ 578 /// Availability: 3.* 579 580 PyObject* PyUnicode_AsDecodedUnicode( 581 PyObject* unicode, 582 const(char)* encoding, 583 const(char)* errors 584 ); 585 } 586 587 /** Encodes a Py_UNICODE buffer of the given size and returns a 588 Python string object. 589 Params: 590 s = Unicode char buffer 591 size = number of Py_UNICODE chars to encode 592 encoding = encoding 593 errors = error handling 594 */ 595 PyObject* PyUnicode_Encode( 596 Py_UNICODE* s, 597 Py_ssize_t size, 598 const(char)* encoding, 599 const(char)* errors); 600 601 version(Python_3_6_Or_Later) { 602 /** Encodes a Unicode object and returns the result as Python object. 603 */ 604 deprecated("Deprecated in 3.6") 605 PyObject* PyUnicode_AsEncodedObject( 606 PyObject* unicode, 607 const(char)* encoding, 608 const(char)* errors); 609 }else{ 610 /** Encodes a Unicode object and returns the result as Python object. 611 */ 612 PyObject* PyUnicode_AsEncodedObject( 613 PyObject* unicode, 614 const(char)* encoding, 615 const(char)* errors); 616 } 617 618 /** Encodes a Unicode object and returns the result as Python string 619 object. */ 620 PyObject* PyUnicode_AsEncodedString( 621 PyObject* unicode, 622 const(char)* encoding, 623 const(char)* errors); 624 625 version(Python_3_0_Or_Later) { 626 /** Encodes a Unicode object and returns the result as Unicode 627 object. */ 628 deprecated("Deprecated in 3.6") 629 PyObject* PyUnicode_AsEncodedUnicode( 630 PyObject* unicode, 631 const(char)* encoding, 632 const(char)* errors 633 ); 634 }else version(Python_3_0_Or_Later) { 635 /** Encodes a Unicode object and returns the result as Unicode 636 object. */ 637 /// Availability: >= 3.* 638 PyObject* PyUnicode_AsEncodedUnicode( 639 PyObject* unicode, 640 const(char)* encoding, 641 const(char)* errors 642 ); 643 } 644 645 /** 646 Params: 647 string = UTF-7 encoded string 648 length = size of string 649 errors = error handling 650 */ 651 PyObject* PyUnicode_DecodeUTF7( 652 const(char)* string, 653 Py_ssize_t length, 654 const(char)* errors); 655 656 /** 657 Params: 658 data = Unicode char buffer 659 length = number of Py_UNICODE chars to encode 660 base64SetO = Encode RFC2152 Set O characters in base64 661 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 662 errors = error handling 663 */ 664 PyObject* PyUnicode_EncodeUTF7( 665 Py_UNICODE* data, 666 Py_ssize_t length, 667 int encodeSetO, 668 int encodeWhiteSpace, 669 const(char)* errors 670 ); 671 672 /// _ 673 PyObject* PyUnicode_DecodeUTF8( 674 const(char)* string, 675 Py_ssize_t length, 676 const(char)* errors); 677 /// _ 678 PyObject* PyUnicode_DecodeUTF8Stateful( 679 const(char)* string, 680 Py_ssize_t length, 681 const(char)* errors, 682 Py_ssize_t* consumed 683 ); 684 /// _ 685 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 686 /// _ 687 PyObject* PyUnicode_EncodeUTF8( 688 Py_UNICODE* data, 689 Py_ssize_t length, 690 const(char) *errors); 691 692 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 693 the corresponding Unicode object. 694 695 errors (if non-NULL) defines the error handling. It defaults 696 to "strict". 697 698 If byteorder is non-NULL, the decoder starts decoding using the 699 given byte order: 700 701 *byteorder == -1: little endian 702 *byteorder == 0: native order 703 *byteorder == 1: big endian 704 705 In native mode, the first two bytes of the stream are checked for a 706 BOM mark. If found, the BOM mark is analysed, the byte order 707 adjusted and the BOM skipped. In the other modes, no BOM mark 708 interpretation is done. After completion, *byteorder is set to the 709 current byte order at the end of input data. 710 711 If byteorder is NULL, the codec starts in native order mode. 712 713 */ 714 PyObject* PyUnicode_DecodeUTF16( 715 const(char)* string, 716 Py_ssize_t length, 717 const(char)* errors, 718 int* byteorder); 719 /** 720 Params: 721 string = UTF-16 encoded string 722 length = size of string 723 errors = error handling 724 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 725 consumed = bytes consumed 726 */ 727 PyObject* PyUnicode_DecodeUTF16Stateful( 728 const(char)* string, 729 Py_ssize_t length, 730 const(char)* errors, 731 int* byteorder, 732 Py_ssize_t* consumed 733 ); 734 /** Returns a Python string using the UTF-16 encoding in native byte 735 order. The string always starts with a BOM mark. */ 736 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 737 /** Returns a Python string object holding the UTF-16 encoded value of 738 the Unicode data. 739 740 If byteorder is not 0, output is written according to the following 741 byte order: 742 743 byteorder == -1: little endian 744 byteorder == 0: native byte order (writes a BOM mark) 745 byteorder == 1: big endian 746 747 If byteorder is 0, the output string will always start with the 748 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 749 prepended. 750 751 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 752 UCS-2. This trick makes it possible to add full UTF-16 capabilities 753 at a later point without compromising the APIs. 754 755 */ 756 PyObject* PyUnicode_EncodeUTF16( 757 Py_UNICODE* data, 758 Py_ssize_t length, 759 const(char)* errors, 760 int byteorder 761 ); 762 763 /// _ 764 PyObject* PyUnicode_DecodeUnicodeEscape( 765 const(char)* string, 766 Py_ssize_t length, 767 const(char)* errors); 768 /// _ 769 PyObject* PyUnicode_AsUnicodeEscapeString( 770 PyObject* unicode); 771 /// _ 772 PyObject* PyUnicode_EncodeUnicodeEscape( 773 Py_UNICODE* data, 774 Py_ssize_t length); 775 /** 776 Params: 777 string = Raw-Unicode-Escape encoded string 778 length = size of string 779 errors = error handling 780 */ 781 PyObject* PyUnicode_DecodeRawUnicodeEscape( 782 const(char)* string, 783 Py_ssize_t length, 784 const(char)* errors); 785 /// _ 786 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 787 /// _ 788 PyObject* PyUnicode_EncodeRawUnicodeEscape( 789 Py_UNICODE* data, Py_ssize_t length); 790 791 /// _ 792 PyObject* _PyUnicode_DecodeUnicodeInternal( 793 const(char)* string, 794 Py_ssize_t length, 795 const(char)* errors); 796 797 /** 798 Params: 799 string = Latin-1 encoded string 800 length = size of string 801 errors = error handling 802 */ 803 PyObject* PyUnicode_DecodeLatin1( 804 const(char)* string, 805 Py_ssize_t length, 806 const(char)* errors); 807 /// _ 808 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 809 /** 810 Params: 811 data = Unicode char buffer 812 length = Number of Py_UNICODE chars to encode 813 errors = error handling 814 */ 815 PyObject* PyUnicode_EncodeLatin1( 816 Py_UNICODE* data, 817 Py_ssize_t length, 818 const(char)* errors); 819 820 /** 821 Params: 822 data = Unicode char buffer 823 length = Number of Py_UNICODE chars to encode 824 errors = error handling 825 */ 826 PyObject* PyUnicode_DecodeASCII( 827 const(char)* string, 828 Py_ssize_t length, 829 const(char)* errors); 830 /// _ 831 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 832 /** 833 Params: 834 data = Unicode char buffer 835 length = Number of Py_UNICODE chars to encode 836 errors = error handling 837 */ 838 PyObject* PyUnicode_EncodeASCII( 839 Py_UNICODE* data, 840 Py_ssize_t length, 841 const(char)* errors); 842 843 /** 844 Params: 845 string = Encoded string 846 length = size of string 847 mapping = character mapping (char ordinal -> unicode ordinal) 848 errors = error handling 849 */ 850 PyObject* PyUnicode_DecodeCharmap( 851 const(char)* string, 852 Py_ssize_t length, 853 PyObject* mapping, 854 const(char)* errors 855 ); 856 /** 857 Params: 858 unicode = Unicode object 859 mapping = character mapping (unicode ordinal -> char ordinal) 860 */ 861 PyObject* PyUnicode_AsCharmapString( 862 PyObject* unicode, 863 PyObject* mapping); 864 /** 865 Params: 866 data = Unicode char buffer 867 length = Number of Py_UNICODE chars to encode 868 mapping = character mapping (unicode ordinal -> char ordinal) 869 errors = error handling 870 */ 871 PyObject* PyUnicode_EncodeCharmap( 872 Py_UNICODE* data, 873 Py_ssize_t length, 874 PyObject* mapping, 875 const(char)* errors 876 ); 877 /** Translate a Py_UNICODE buffer of the given length by applying a 878 character mapping table to it and return the resulting Unicode 879 object. 880 881 The mapping table must map Unicode ordinal integers to Unicode 882 ordinal integers or None (causing deletion of the character). 883 884 Mapping tables may be dictionaries or sequences. Unmapped character 885 ordinals (ones which cause a LookupError) are left untouched and 886 are copied as-is. 887 888 */ 889 PyObject* PyUnicode_TranslateCharmap( 890 Py_UNICODE* data, 891 Py_ssize_t length, 892 PyObject* table, 893 const(char)* errors 894 ); 895 896 version (Windows) { 897 /// Availability: Windows only 898 PyObject* PyUnicode_DecodeMBCS( 899 const(char)* string, 900 Py_ssize_t length, 901 const(char)* errors); 902 /// Availability: Windows only 903 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 904 /// Availability: Windows only 905 PyObject* PyUnicode_EncodeMBCS( 906 Py_UNICODE* data, 907 Py_ssize_t length, 908 const(char)* errors); 909 } 910 /** Takes a Unicode string holding a decimal value and writes it into 911 an output buffer using standard ASCII digit codes. 912 913 The output buffer has to provide at least length+1 bytes of storage 914 area. The output string is 0-terminated. 915 916 The encoder converts whitespace to ' ', decimal characters to their 917 corresponding ASCII digit and all other Latin-1 characters except 918 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 919 are treated as errors. This includes embedded NULL bytes. 920 921 Error handling is defined by the errors argument: 922 923 NULL or "strict": raise a ValueError 924 "ignore": ignore the wrong characters (these are not copied to the 925 output buffer) 926 "replace": replaces illegal characters with '?' 927 928 Returns 0 on success, -1 on failure. 929 930 */ 931 int PyUnicode_EncodeDecimal( 932 Py_UNICODE* s, 933 Py_ssize_t length, 934 char* output, 935 const(char)* errors); 936 937 /** Concat two strings giving a new Unicode string. */ 938 PyObject* PyUnicode_Concat( 939 PyObject* left, 940 PyObject* right); 941 942 version(Python_3_0_Or_Later) { 943 /** Concat two strings and put the result in *pleft 944 (sets *pleft to NULL on error) 945 Params: 946 pleft = Pointer to left string 947 right = Right string 948 */ 949 /// Availability: 3.* 950 951 void PyUnicode_Append( 952 PyObject** pleft, 953 PyObject* right 954 ); 955 956 /** Concat two strings, put the result in *pleft and drop the right object 957 (sets *pleft to NULL on error) 958 Params: 959 pleft = Pointer to left string 960 */ 961 /// Availability: 3.* 962 void PyUnicode_AppendAndDel( 963 PyObject** pleft, 964 PyObject* right 965 ); 966 } 967 968 /** Split a string giving a list of Unicode strings. 969 970 If sep is NULL, splitting will be done at all whitespace 971 substrings. Otherwise, splits occur at the given separator. 972 973 At most maxsplit splits will be done. If negative, no limit is set. 974 975 Separators are not included in the resulting list. 976 977 */ 978 PyObject* PyUnicode_Split( 979 PyObject* s, 980 PyObject* sep, 981 Py_ssize_t maxsplit); 982 983 /** Ditto PyUnicode_Split, but split at line breaks. 984 985 CRLF is considered to be one line break. Line breaks are not 986 included in the resulting list. */ 987 PyObject* PyUnicode_Splitlines( 988 PyObject* s, 989 int keepends); 990 991 version(Python_2_5_Or_Later) { 992 /** Partition a string using a given separator. */ 993 /// Availability: >= 2.5 994 PyObject* PyUnicode_Partition( 995 PyObject* s, 996 PyObject* sep 997 ); 998 999 /** Partition a string using a given separator, searching from the end 1000 of the string. */ 1001 1002 PyObject* PyUnicode_RPartition( 1003 PyObject* s, 1004 PyObject* sep 1005 ); 1006 } 1007 1008 /** Split a string giving a list of Unicode strings. 1009 1010 If sep is NULL, splitting will be done at all whitespace 1011 substrings. Otherwise, splits occur at the given separator. 1012 1013 At most maxsplit splits will be done. But unlike PyUnicode_Split 1014 PyUnicode_RSplit splits from the end of the string. If negative, 1015 no limit is set. 1016 1017 Separators are not included in the resulting list. 1018 1019 */ 1020 PyObject* PyUnicode_RSplit( 1021 PyObject* s, 1022 PyObject* sep, 1023 Py_ssize_t maxsplit); 1024 1025 /** Translate a string by applying a character mapping table to it and 1026 return the resulting Unicode object. 1027 1028 The mapping table must map Unicode ordinal integers to Unicode 1029 ordinal integers or None (causing deletion of the character). 1030 1031 Mapping tables may be dictionaries or sequences. Unmapped character 1032 ordinals (ones which cause a LookupError) are left untouched and 1033 are copied as-is. 1034 1035 */ 1036 PyObject* PyUnicode_Translate( 1037 PyObject* str, 1038 PyObject* table, 1039 const(char)* errors); 1040 1041 /** Join a sequence of strings using the given separator and return 1042 the resulting Unicode string. */ 1043 PyObject* PyUnicode_Join( 1044 PyObject* separator, 1045 PyObject* seq); 1046 1047 /** Return 1 if substr matches str[start:end] at the given tail end, 0 1048 otherwise. */ 1049 Py_ssize_t PyUnicode_Tailmatch( 1050 PyObject* str, 1051 PyObject* substr, 1052 Py_ssize_t start, 1053 Py_ssize_t end, 1054 int direction 1055 ); 1056 1057 /** Return the first position of substr in str[start:end] using the 1058 given search direction or -1 if not found. -2 is returned in case 1059 an error occurred and an exception is set. */ 1060 Py_ssize_t PyUnicode_Find( 1061 PyObject* str, 1062 PyObject* substr, 1063 Py_ssize_t start, 1064 Py_ssize_t end, 1065 int direction 1066 ); 1067 1068 /** Count the number of occurrences of substr in str[start:end]. */ 1069 Py_ssize_t PyUnicode_Count( 1070 PyObject* str, 1071 PyObject* substr, 1072 Py_ssize_t start, 1073 Py_ssize_t end); 1074 1075 /** Replace at most maxcount occurrences of substr in str with replstr 1076 and return the resulting Unicode object. */ 1077 PyObject* PyUnicode_Replace( 1078 PyObject* str, 1079 PyObject* substr, 1080 PyObject* replstr, 1081 Py_ssize_t maxcount 1082 ); 1083 1084 /** Compare two strings and return -1, 0, 1 for less than, equal, 1085 greater than resp. */ 1086 int PyUnicode_Compare(PyObject* left, PyObject* right); 1087 version(Python_3_0_Or_Later) { 1088 /** Compare two strings and return -1, 0, 1 for less than, equal, 1089 greater than resp. 1090 Params: 1091 left = 1092 right = ASCII-encoded string 1093 */ 1094 /// Availability: 3.* 1095 int PyUnicode_CompareWithASCIIString( 1096 PyObject* left, 1097 const(char)* right 1098 ); 1099 } 1100 1101 version(Python_2_5_Or_Later) { 1102 /** Rich compare two strings and return one of the following: 1103 1104 - NULL in case an exception was raised 1105 - Py_True or Py_False for successfuly comparisons 1106 - Py_NotImplemented in case the type combination is unknown 1107 1108 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 1109 case the conversion of the arguments to Unicode fails with a 1110 UnicodeDecodeError. 1111 1112 Possible values for op: 1113 1114 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 1115 1116 */ 1117 /// Availability: >= 2.5 1118 PyObject* PyUnicode_RichCompare( 1119 PyObject* left, 1120 PyObject* right, 1121 int op 1122 ); 1123 } 1124 1125 /** Apply a argument tuple or dictionary to a format string and return 1126 the resulting Unicode string. */ 1127 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 1128 1129 /** Checks whether element is contained in container and return 1/0 1130 accordingly. 1131 1132 element has to coerce to an one element Unicode string. -1 is 1133 returned in case of an error. */ 1134 int PyUnicode_Contains(PyObject* container, PyObject* element); 1135 1136 version(Python_3_0_Or_Later) { 1137 /** Checks whether argument is a valid identifier. */ 1138 /// Availability: 3.* 1139 int PyUnicode_IsIdentifier(PyObject* s); 1140 } 1141 1142 1143 /// _ 1144 int _PyUnicode_IsLowercase(Py_UNICODE ch); 1145 /// _ 1146 int _PyUnicode_IsUppercase(Py_UNICODE ch); 1147 /// _ 1148 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 1149 /// _ 1150 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 1151 /// _ 1152 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 1153 /// _ 1154 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 1155 /// _ 1156 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 1157 /// _ 1158 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 1159 /// _ 1160 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 1161 /// _ 1162 int _PyUnicode_ToDigit(Py_UNICODE ch); 1163 /// _ 1164 double _PyUnicode_ToNumeric(Py_UNICODE ch); 1165 /// _ 1166 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 1167 /// _ 1168 int _PyUnicode_IsDigit(Py_UNICODE ch); 1169 /// _ 1170 int _PyUnicode_IsNumeric(Py_UNICODE ch); 1171 /// _ 1172 int _PyUnicode_IsAlpha(Py_UNICODE ch); 1173 1174 }; 1175 1176 /* 1177 pragma(msg,substitute_and_alias(unicode_funs)); 1178 mixin(substitute_and_alias(unicode_funs)); 1179 */ 1180 1181 // waaaa! calling substitute_and_alias breaks linking! 1182 // oh, well. this is probably faster anyways. 1183 // following code is generated by substitute_and_alias. 1184 // don't modify it; modify unicode_funs! 1185 version(Python_3_3_Or_Later) { 1186 version(Python_2_6_Or_Later) { 1187 1188 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 1189 size. 1190 1191 u may be NULL which causes the contents to be undefined. It is the 1192 user's responsibility to fill in the needed data afterwards. Note 1193 that modifying the Unicode object contents after construction is 1194 only allowed if u was set to NULL. 1195 1196 The buffer is copied into the new object. */ 1197 /// Availability: >= 2.6 1198 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 1199 1200 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 1201 /// Availability: >= 2.6 1202 PyObject* PyUnicode_FromStringAndSize( 1203 const(char)*u, /* char buffer */ 1204 Py_ssize_t size /* size of buffer */ 1205 ); 1206 1207 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 1208 Latin-1 encoded bytes */ 1209 /// Availability: >= 2.6 1210 PyObject* PyUnicode_FromString( 1211 const(char)*u /* string */ 1212 ); 1213 1214 /// Availability: >= 2.6 1215 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 1216 1217 /// Availability: >= 2.6 1218 PyObject* PyUnicode_FromFormat(const(char)*, ...); 1219 1220 /** Format the object based on the format_spec, as defined in PEP 3101 1221 (Advanced String Formatting). */ 1222 /// Availability: >= 2.6 1223 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 1224 Py_UNICODE *format_spec, 1225 Py_ssize_t format_spec_len); 1226 1227 /// Availability: >= 2.6 1228 int PyUnicode_ClearFreeList(); 1229 1230 /** 1231 Params: 1232 string = UTF-7 encoded string 1233 length = size of string 1234 errors = error handling 1235 consumed = bytes consumed 1236 */ 1237 /// Availability: >= 2.6 1238 PyObject* PyUnicode_DecodeUTF7Stateful( 1239 const(char)* string, 1240 Py_ssize_t length, 1241 const(char)*errors, 1242 Py_ssize_t *consumed 1243 ); 1244 1245 /** 1246 Params: 1247 _string = UTF-32 encoded string 1248 length = size of string 1249 errors = error handling 1250 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1251 */ 1252 /// Availability: >= 2.6 1253 PyObject* PyUnicode_DecodeUTF32( 1254 const(char)* _string, 1255 Py_ssize_t length, 1256 const(char)*errors, 1257 int *byteorder 1258 ); 1259 1260 /** 1261 Params: 1262 _string = UTF-32 encoded string 1263 length = size of string 1264 errors = error handling 1265 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1266 consumed = bytes consumed 1267 */ 1268 /// Availability: >= 2.6 1269 PyObject* PyUnicode_DecodeUTF32Stateful( 1270 const(char)*_string, 1271 Py_ssize_t length, 1272 const(char)*errors, 1273 int *byteorder, 1274 Py_ssize_t *consumed 1275 ); 1276 1277 /** Returns a Python string using the UTF-32 encoding in native byte 1278 order. The string always starts with a BOM mark. */ 1279 /// Availability: >= 2.6 1280 1281 PyObject* PyUnicode_AsUTF32String( 1282 PyObject *unicode 1283 ); 1284 1285 /** Returns a Python string object holding the UTF-32 encoded value of 1286 the Unicode data. 1287 1288 If byteorder is not 0, output is written according to the following 1289 byte order: 1290 1291 byteorder == -1: little endian 1292 byteorder == 0: native byte order (writes a BOM mark) 1293 byteorder == 1: big endian 1294 1295 If byteorder is 0, the output string will always start with the 1296 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1297 prepended. 1298 Params: 1299 data = Unicode char buffer 1300 length = number of Py_UNICODE chars to encode 1301 errors = error handling 1302 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 1303 1304 */ 1305 /// Availability: >= 2.6 1306 PyObject* PyUnicode_EncodeUTF32( 1307 const Py_UNICODE *data, 1308 Py_ssize_t length, 1309 const(char)* errors, 1310 int byteorder 1311 ); 1312 1313 } 1314 1315 /** Return a read-only pointer to the Unicode object's internal 1316 Py_UNICODE buffer. */ 1317 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 1318 1319 /** Get the length of the Unicode object. */ 1320 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 1321 1322 /** Get the maximum ordinal for a Unicode character. */ 1323 Py_UNICODE PyUnicode_GetMax(); 1324 1325 /** Resize an already allocated Unicode object to the new size length. 1326 1327 _*unicode is modified to point to the new (resized) object and 0 1328 returned on success. 1329 1330 This API may only be called by the function which also called the 1331 Unicode constructor. The refcount on the object must be 1. Otherwise, 1332 an error is returned. 1333 1334 Error handling is implemented as follows: an exception is set, -1 1335 is returned and *unicode left untouched. 1336 Params: 1337 unicode = pointer to the new unicode object. 1338 length = New length. 1339 1340 */ 1341 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 1342 1343 /** Coerce obj to an Unicode object and return a reference with 1344 _*incremented* refcount. 1345 1346 Coercion is done in the following way: 1347 1348 1. String and other char buffer compatible objects are decoded 1349 under the assumptions that they contain data using the current 1350 default encoding. Decoding is done in "strict" mode. 1351 1352 2. All other objects (including Unicode objects) raise an 1353 exception. 1354 1355 The API returns NULL in case of an error. The caller is responsible 1356 for decref'ing the returned objects. 1357 1358 */ 1359 PyObject* PyUnicode_FromEncodedObject( 1360 PyObject* obj, 1361 const(char)* encoding, 1362 const(char)* errors); 1363 1364 /** Coerce obj to an Unicode object and return a reference with 1365 _*incremented* refcount. 1366 1367 Unicode objects are passed back as-is (subclasses are converted to 1368 true Unicode objects), all other objects are delegated to 1369 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 1370 using the default encoding as basis for decoding the object. 1371 1372 The API returns NULL in case of an error. The caller is responsible 1373 for decref'ing the returned objects. 1374 1375 */ 1376 PyObject* PyUnicode_FromObject(PyObject* obj); 1377 1378 /** Create a Unicode Object from the whcar_t buffer w of the given 1379 size. 1380 1381 The buffer is copied into the new object. */ 1382 PyObject* PyUnicode_FromWideChar(const(wchar)* w, Py_ssize_t size); 1383 1384 /** Copies the Unicode Object contents into the wchar_t buffer w. At 1385 most size wchar_t characters are copied. 1386 1387 Note that the resulting wchar_t string may or may not be 1388 0-terminated. It is the responsibility of the caller to make sure 1389 that the wchar_t string is 0-terminated in case this is required by 1390 the application. 1391 1392 Returns the number of wchar_t characters copied (excluding a 1393 possibly trailing 0-termination character) or -1 in case of an 1394 error. */ 1395 Py_ssize_t PyUnicode_AsWideChar( 1396 PyUnicodeObject* unicode, 1397 const(wchar)* w, 1398 Py_ssize_t size); 1399 1400 /** Create a Unicode Object from the given Unicode code point ordinal. 1401 1402 The ordinal must be in range(0x10000) on narrow Python builds 1403 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 1404 raised in case it is not. 1405 1406 */ 1407 PyObject* PyUnicode_FromOrdinal(int ordinal); 1408 1409 /** Return a Python string holding the default encoded value of the 1410 Unicode object. 1411 1412 The resulting string is cached in the Unicode object for subsequent 1413 usage by this function. The cached version is needed to implement 1414 the character buffer interface and will live (at least) as long as 1415 the Unicode object itself. 1416 1417 The refcount of the string is *not* incremented. 1418 1419 _*** Exported for internal use by the interpreter only !!! *** 1420 1421 */ 1422 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 1423 1424 /** Returns the currently active default encoding. 1425 1426 The default encoding is currently implemented as run-time settable 1427 process global. This may change in future versions of the 1428 interpreter to become a parameter which is managed on a per-thread 1429 basis. 1430 1431 */ 1432 const(char)* PyUnicode_GetDefaultEncoding(); 1433 1434 /** Sets the currently active default encoding. 1435 1436 Returns 0 on success, -1 in case of an error. 1437 1438 */ 1439 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 1440 1441 /** Create a Unicode object by decoding the encoded string s of the 1442 given size. 1443 Params: 1444 s = encoded string 1445 size = size of buffer 1446 encoding = encoding 1447 errors = error handling 1448 */ 1449 PyObject* PyUnicode_Decode( 1450 const(char)* s, 1451 Py_ssize_t size, 1452 const(char)* encoding, 1453 const(char)* errors); 1454 1455 version(Python_3_0_Or_Later) { 1456 /** Decode a Unicode object unicode and return the result as Python 1457 object. */ 1458 /// Availability: 3.* 1459 1460 PyObject* PyUnicode_AsDecodedObject( 1461 PyObject* unicode, 1462 const(char)* encoding, 1463 const(char)* errors 1464 ); 1465 1466 /** Decode a Unicode object unicode and return the result as Unicode 1467 object. */ 1468 /// Availability: 3.* 1469 1470 PyObject* PyUnicode_AsDecodedUnicode( 1471 PyObject* unicode, 1472 const(char)* encoding, 1473 const(char)* errors 1474 ); 1475 1476 } 1477 1478 /** Encodes a Py_UNICODE buffer of the given size and returns a 1479 Python string object. 1480 Params: 1481 s = Unicode char buffer 1482 size = number of Py_UNICODE chars to encode 1483 encoding = encoding 1484 errors = error handling 1485 */ 1486 PyObject* PyUnicode_Encode( 1487 Py_UNICODE* s, 1488 Py_ssize_t size, 1489 const(char)* encoding, 1490 const(char)* errors); 1491 1492 /** Encodes a Unicode object and returns the result as Python object. 1493 */ 1494 PyObject* PyUnicode_AsEncodedObject( 1495 PyObject* unicode, 1496 const(char)* encoding, 1497 const(char)* errors); 1498 1499 /** Encodes a Unicode object and returns the result as Python string 1500 object. */ 1501 PyObject* PyUnicode_AsEncodedString( 1502 PyObject* unicode, 1503 const(char)* encoding, 1504 const(char)* errors); 1505 1506 version(Python_3_0_Or_Later) { 1507 /** Encodes a Unicode object and returns the result as Unicode 1508 object. */ 1509 /// Availability: >= 3.* 1510 PyObject* PyUnicode_AsEncodedUnicode( 1511 PyObject* unicode, 1512 const(char)* encoding, 1513 const(char)* errors 1514 ); 1515 1516 } 1517 1518 /** 1519 Params: 1520 string = UTF-7 encoded string 1521 length = size of string 1522 errors = error handling 1523 */ 1524 PyObject* PyUnicode_DecodeUTF7( 1525 const(char)* string, 1526 Py_ssize_t length, 1527 const(char)* errors); 1528 1529 version(Python_3_8_Or_Later) { 1530 }else{ 1531 /** 1532 Params: 1533 data = Unicode char buffer 1534 length = number of Py_UNICODE chars to encode 1535 base64SetO = Encode RFC2152 Set O characters in base64 1536 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 1537 errors = error handling 1538 */ 1539 /// Availability: < 3.8 1540 PyObject* PyUnicode_EncodeUTF7( 1541 Py_UNICODE* data, 1542 Py_ssize_t length, 1543 int base64Set0, 1544 int base64WhiteSpace, 1545 const(char)* errors 1546 ); 1547 } 1548 1549 /// _ 1550 PyObject* PyUnicode_DecodeUTF8( 1551 const(char)* string, 1552 Py_ssize_t length, 1553 const(char)* errors); 1554 1555 /// _ 1556 PyObject* PyUnicode_DecodeUTF8Stateful( 1557 const(char)* string, 1558 Py_ssize_t length, 1559 const(char)* errors, 1560 Py_ssize_t* consumed 1561 ); 1562 1563 /// _ 1564 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 1565 1566 /// _ 1567 PyObject* PyUnicode_EncodeUTF8( 1568 Py_UNICODE* data, 1569 Py_ssize_t length, 1570 const(char) *errors); 1571 1572 1573 1574 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 1575 the corresponding Unicode object. 1576 1577 errors (if non-NULL) defines the error handling. It defaults 1578 to "strict". 1579 1580 If byteorder is non-NULL, the decoder starts decoding using the 1581 given byte order: 1582 1583 *byteorder == -1: little endian 1584 *byteorder == 0: native order 1585 *byteorder == 1: big endian 1586 1587 In native mode, the first two bytes of the stream are checked for a 1588 BOM mark. If found, the BOM mark is analysed, the byte order 1589 adjusted and the BOM skipped. In the other modes, no BOM mark 1590 interpretation is done. After completion, *byteorder is set to the 1591 current byte order at the end of input data. 1592 1593 If byteorder is NULL, the codec starts in native order mode. 1594 1595 */ 1596 PyObject* PyUnicode_DecodeUTF16( 1597 const(char)* string, 1598 Py_ssize_t length, 1599 const(char)* errors, 1600 int* byteorder); 1601 1602 1603 /** 1604 Params: 1605 string = UTF-16 encoded string 1606 length = size of string 1607 errors = error handling 1608 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1609 consumed = bytes consumed 1610 */ 1611 PyObject* PyUnicode_DecodeUTF16Stateful( 1612 const(char)* string, 1613 Py_ssize_t length, 1614 const(char)* errors, 1615 int* byteorder, 1616 Py_ssize_t* consumed 1617 ); 1618 1619 1620 /** Returns a Python string using the UTF-16 encoding in native byte 1621 order. The string always starts with a BOM mark. */ 1622 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 1623 1624 1625 /** Returns a Python string object holding the UTF-16 encoded value of 1626 the Unicode data. 1627 1628 If byteorder is not 0, output is written according to the following 1629 byte order: 1630 1631 byteorder == -1: little endian 1632 byteorder == 0: native byte order (writes a BOM mark) 1633 byteorder == 1: big endian 1634 1635 If byteorder is 0, the output string will always start with the 1636 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1637 prepended. 1638 1639 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1640 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1641 at a later point without compromising the APIs. 1642 1643 */ 1644 PyObject* PyUnicode_EncodeUTF16( 1645 Py_UNICODE* data, 1646 Py_ssize_t length, 1647 const(char)* errors, 1648 int byteorder 1649 ); 1650 1651 1652 1653 /// _ 1654 PyObject* PyUnicode_DecodeUnicodeEscape( 1655 const(char)* string, 1656 Py_ssize_t length, 1657 const(char)* errors); 1658 1659 1660 /// _ 1661 PyObject* PyUnicode_AsUnicodeEscapeString( 1662 PyObject* unicode); 1663 1664 1665 /// _ 1666 PyObject* PyUnicode_EncodeUnicodeEscape( 1667 Py_UNICODE* data, 1668 Py_ssize_t length); 1669 1670 1671 /** 1672 Params: 1673 string = Raw-Unicode-Escape encoded string 1674 length = size of string 1675 errors = error handling 1676 */ 1677 PyObject* PyUnicode_DecodeRawUnicodeEscape( 1678 const(char)* string, 1679 Py_ssize_t length, 1680 const(char)* errors); 1681 1682 /// _ 1683 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 1684 1685 /// _ 1686 PyObject* PyUnicode_EncodeRawUnicodeEscape( 1687 Py_UNICODE* data, Py_ssize_t length); 1688 1689 /// _ 1690 PyObject* _PyUnicode_DecodeUnicodeInternal( 1691 const(char)* string, 1692 Py_ssize_t length, 1693 const(char)* errors); 1694 1695 /** 1696 Params: 1697 string = Latin-1 encoded string 1698 length = size of string 1699 errors = error handling 1700 */ 1701 PyObject* PyUnicode_DecodeLatin1( 1702 const(char)* string, 1703 Py_ssize_t length, 1704 const(char)* errors); 1705 1706 /// _ 1707 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 1708 1709 /** 1710 Params: 1711 data = Unicode char buffer 1712 length = Number of Py_UNICODE chars to encode 1713 errors = error handling 1714 */ 1715 PyObject* PyUnicode_EncodeLatin1( 1716 Py_UNICODE* data, 1717 Py_ssize_t length, 1718 const(char)* errors); 1719 1720 /** 1721 Params: 1722 string = Unicode char buffer 1723 length = Number of Py_UNICODE chars to encode 1724 errors = error handling 1725 */ 1726 PyObject* PyUnicode_DecodeASCII( 1727 const(char)* string, 1728 Py_ssize_t length, 1729 const(char)* errors); 1730 1731 /// _ 1732 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 1733 1734 /** 1735 Params: 1736 data = Unicode char buffer 1737 length = Number of Py_UNICODE chars to encode 1738 errors = error handling 1739 */ 1740 PyObject* PyUnicode_EncodeASCII( 1741 Py_UNICODE* data, 1742 Py_ssize_t length, 1743 const(char)* errors); 1744 1745 /** 1746 Params: 1747 string = Encoded string 1748 length = size of string 1749 mapping = character mapping (char ordinal -> unicode ordinal) 1750 errors = error handling 1751 */ 1752 PyObject* PyUnicode_DecodeCharmap( 1753 const(char)* string, 1754 Py_ssize_t length, 1755 PyObject* mapping, 1756 const(char)* errors 1757 ); 1758 1759 /** 1760 Params: 1761 unicode = Unicode object 1762 mapping = character mapping (unicode ordinal -> char ordinal) 1763 */ 1764 PyObject* PyUnicode_AsCharmapString( 1765 PyObject* unicode, 1766 PyObject* mapping); 1767 1768 /** 1769 Params: 1770 data = Unicode char buffer 1771 length = Number of Py_UNICODE chars to encode 1772 mapping = character mapping (unicode ordinal -> char ordinal) 1773 errors = error handling 1774 */ 1775 PyObject* PyUnicode_EncodeCharmap( 1776 Py_UNICODE* data, 1777 Py_ssize_t length, 1778 PyObject* mapping, 1779 const(char)* errors 1780 ); 1781 1782 /** Translate a Py_UNICODE buffer of the given length by applying a 1783 character mapping table to it and return the resulting Unicode 1784 object. 1785 1786 The mapping table must map Unicode ordinal integers to Unicode 1787 ordinal integers or None (causing deletion of the character). 1788 1789 Mapping tables may be dictionaries or sequences. Unmapped character 1790 ordinals (ones which cause a LookupError) are left untouched and 1791 are copied as-is. 1792 1793 */ 1794 PyObject* PyUnicode_TranslateCharmap( 1795 Py_UNICODE* data, 1796 Py_ssize_t length, 1797 PyObject* table, 1798 const(char)* errors 1799 ); 1800 1801 version (Windows) { 1802 /// Availability: Windows only 1803 PyObject* PyUnicode_DecodeMBCS( 1804 const(char)* string, 1805 Py_ssize_t length, 1806 const(char)* errors); 1807 1808 /// Availability: Windows only 1809 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 1810 1811 /// Availability: Windows only 1812 PyObject* PyUnicode_EncodeMBCS( 1813 Py_UNICODE* data, 1814 Py_ssize_t length, 1815 const(char)* errors); 1816 1817 } 1818 /** Takes a Unicode string holding a decimal value and writes it into 1819 an output buffer using standard ASCII digit codes. 1820 1821 The output buffer has to provide at least length+1 bytes of storage 1822 area. The output string is 0-terminated. 1823 1824 The encoder converts whitespace to ' ', decimal characters to their 1825 corresponding ASCII digit and all other Latin-1 characters except 1826 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1827 are treated as errors. This includes embedded NULL bytes. 1828 1829 Error handling is defined by the errors argument: 1830 1831 NULL or "strict": raise a ValueError 1832 "ignore": ignore the wrong characters (these are not copied to the 1833 output buffer) 1834 "replace": replaces illegal characters with '?' 1835 1836 Returns 0 on success, -1 on failure. 1837 1838 */ 1839 int PyUnicode_EncodeDecimal( 1840 Py_UNICODE* s, 1841 Py_ssize_t length, 1842 char* output, 1843 const(char)* errors); 1844 1845 /** Concat two strings giving a new Unicode string. */ 1846 PyObject* PyUnicode_Concat( 1847 PyObject* left, 1848 PyObject* right); 1849 1850 version(Python_3_0_Or_Later) { 1851 /** Concat two strings and put the result in *pleft 1852 (sets *pleft to NULL on error) 1853 Params: 1854 pleft = Pointer to left string 1855 right = Right string 1856 */ 1857 /// Availability: 3.* 1858 1859 void PyUnicode_Append( 1860 PyObject** pleft, 1861 PyObject* right 1862 ); 1863 1864 /** Concat two strings, put the result in *pleft and drop the right object 1865 (sets *pleft to NULL on error) 1866 Params: 1867 pleft = Pointer to left string 1868 right = 1869 */ 1870 /// Availability: 3.* 1871 void PyUnicode_AppendAndDel( 1872 PyObject** pleft, 1873 PyObject* right 1874 ); 1875 1876 } 1877 1878 /** Split a string giving a list of Unicode strings. 1879 1880 If sep is NULL, splitting will be done at all whitespace 1881 substrings. Otherwise, splits occur at the given separator. 1882 1883 At most maxsplit splits will be done. If negative, no limit is set. 1884 1885 Separators are not included in the resulting list. 1886 1887 */ 1888 PyObject* PyUnicode_Split( 1889 PyObject* s, 1890 PyObject* sep, 1891 Py_ssize_t maxsplit); 1892 1893 /** Ditto PyUnicode_Split, but split at line breaks. 1894 1895 CRLF is considered to be one line break. Line breaks are not 1896 included in the resulting list. */ 1897 PyObject* PyUnicode_Splitlines( 1898 PyObject* s, 1899 int keepends); 1900 1901 version(Python_2_5_Or_Later) { 1902 /** Partition a string using a given separator. */ 1903 /// Availability: >= 2.5 1904 PyObject* PyUnicode_Partition( 1905 PyObject* s, 1906 PyObject* sep 1907 ); 1908 1909 1910 /** Partition a string using a given separator, searching from the end 1911 of the string. */ 1912 1913 PyObject* PyUnicode_RPartition( 1914 PyObject* s, 1915 PyObject* sep 1916 ); 1917 1918 } 1919 1920 /** Split a string giving a list of Unicode strings. 1921 1922 If sep is NULL, splitting will be done at all whitespace 1923 substrings. Otherwise, splits occur at the given separator. 1924 1925 At most maxsplit splits will be done. But unlike PyUnicode_Split 1926 PyUnicode_RSplit splits from the end of the string. If negative, 1927 no limit is set. 1928 1929 Separators are not included in the resulting list. 1930 1931 */ 1932 PyObject* PyUnicode_RSplit( 1933 PyObject* s, 1934 PyObject* sep, 1935 Py_ssize_t maxsplit); 1936 1937 1938 /** Translate a string by applying a character mapping table to it and 1939 return the resulting Unicode object. 1940 1941 The mapping table must map Unicode ordinal integers to Unicode 1942 ordinal integers or None (causing deletion of the character). 1943 1944 Mapping tables may be dictionaries or sequences. Unmapped character 1945 ordinals (ones which cause a LookupError) are left untouched and 1946 are copied as-is. 1947 1948 */ 1949 PyObject* PyUnicode_Translate( 1950 PyObject* str, 1951 PyObject* table, 1952 const(char)* errors); 1953 1954 /** Join a sequence of strings using the given separator and return 1955 the resulting Unicode string. */ 1956 PyObject* PyUnicode_Join( 1957 PyObject* separator, 1958 PyObject* seq); 1959 1960 /** Return 1 if substr matches str[start:end] at the given tail end, 0 1961 otherwise. */ 1962 Py_ssize_t PyUnicode_Tailmatch( 1963 PyObject* str, 1964 PyObject* substr, 1965 Py_ssize_t start, 1966 Py_ssize_t end, 1967 int direction 1968 ); 1969 1970 1971 /** Return the first position of substr in str[start:end] using the 1972 given search direction or -1 if not found. -2 is returned in case 1973 an error occurred and an exception is set. */ 1974 Py_ssize_t PyUnicode_Find( 1975 PyObject* str, 1976 PyObject* substr, 1977 Py_ssize_t start, 1978 Py_ssize_t end, 1979 int direction 1980 ); 1981 1982 /** Count the number of occurrences of substr in str[start:end]. */ 1983 Py_ssize_t PyUnicode_Count( 1984 PyObject* str, 1985 PyObject* substr, 1986 Py_ssize_t start, 1987 Py_ssize_t end); 1988 1989 /** Replace at most maxcount occurrences of substr in str with replstr 1990 and return the resulting Unicode object. */ 1991 PyObject* PyUnicode_Replace( 1992 PyObject* str, 1993 PyObject* substr, 1994 PyObject* replstr, 1995 Py_ssize_t maxcount 1996 ); 1997 1998 /** Compare two strings and return -1, 0, 1 for less than, equal, 1999 greater than resp. */ 2000 int PyUnicode_Compare(PyObject* left, PyObject* right); 2001 2002 version(Python_3_0_Or_Later) { 2003 /** Compare two strings and return -1, 0, 1 for less than, equal, 2004 greater than resp. 2005 Params: 2006 left = 2007 right = ASCII-encoded string 2008 */ 2009 /// Availability: 3.* 2010 int PyUnicode_CompareWithASCIIString( 2011 PyObject* left, 2012 const(char)* right 2013 ); 2014 } 2015 2016 version(Python_2_5_Or_Later) { 2017 /** Rich compare two strings and return one of the following: 2018 2019 - NULL in case an exception was raised 2020 - Py_True or Py_False for successfuly comparisons 2021 - Py_NotImplemented in case the type combination is unknown 2022 2023 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 2024 case the conversion of the arguments to Unicode fails with a 2025 UnicodeDecodeError. 2026 2027 Possible values for op: 2028 2029 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 2030 2031 */ 2032 /// Availability: >= 2.5 2033 PyObject* PyUnicode_RichCompare( 2034 PyObject* left, 2035 PyObject* right, 2036 int op 2037 ); 2038 } 2039 2040 /** Apply a argument tuple or dictionary to a format string and return 2041 the resulting Unicode string. */ 2042 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 2043 2044 /** Checks whether element is contained in container and return 1/0 2045 accordingly. 2046 2047 element has to coerce to an one element Unicode string. -1 is 2048 returned in case of an error. */ 2049 int PyUnicode_Contains(PyObject* container, PyObject* element); 2050 2051 version(Python_3_0_Or_Later) { 2052 /** Checks whether argument is a valid identifier. */ 2053 /// Availability: 3.* 2054 int PyUnicode_IsIdentifier(PyObject* s); 2055 } 2056 2057 2058 /// _ 2059 int _PyUnicode_IsLowercase(Py_UNICODE ch); 2060 2061 /// _ 2062 int _PyUnicode_IsUppercase(Py_UNICODE ch); 2063 2064 /// _ 2065 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 2066 2067 /// _ 2068 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 2069 2070 /// _ 2071 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 2072 2073 /// _ 2074 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 2075 2076 /// _ 2077 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 2078 2079 /// _ 2080 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 2081 2082 /// _ 2083 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 2084 2085 /// _ 2086 int _PyUnicode_ToDigit(Py_UNICODE ch); 2087 2088 /// _ 2089 double _PyUnicode_ToNumeric(Py_UNICODE ch); 2090 2091 /// _ 2092 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 2093 2094 /// _ 2095 int _PyUnicode_IsDigit(Py_UNICODE ch); 2096 2097 /// _ 2098 int _PyUnicode_IsNumeric(Py_UNICODE ch); 2099 2100 /// _ 2101 int _PyUnicode_IsAlpha(Py_UNICODE ch); 2102 2103 }else version(Python_Unicode_UCS2) { 2104 2105 version(Python_2_6_Or_Later) { 2106 2107 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 2108 size. 2109 2110 u may be NULL which causes the contents to be undefined. It is the 2111 user's responsibility to fill in the needed data afterwards. Note 2112 that modifying the Unicode object contents after construction is 2113 only allowed if u was set to NULL. 2114 2115 The buffer is copied into the new object. */ 2116 /// Availability: >= 2.6 2117 PyObject* PyUnicodeUCS2_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 2118 /// ditto 2119 2120 alias PyUnicodeUCS2_FromUnicode PyUnicode_FromUnicode; 2121 2122 2123 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 2124 /// Availability: >= 2.6 2125 PyObject* PyUnicodeUCS2_FromStringAndSize( 2126 const(char)*u, /* char buffer */ 2127 Py_ssize_t size /* size of buffer */ 2128 ); 2129 /// ditto 2130 2131 alias PyUnicodeUCS2_FromStringAndSize PyUnicode_FromStringAndSize; 2132 2133 2134 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 2135 Latin-1 encoded bytes */ 2136 /// Availability: >= 2.6 2137 PyObject* PyUnicodeUCS2_FromString( 2138 const(char)*u /* string */ 2139 ); 2140 /// ditto 2141 2142 alias PyUnicodeUCS2_FromString PyUnicode_FromString; 2143 2144 /// Availability: >= 2.6 2145 PyObject* PyUnicodeUCS2_FromFormatV(const(char)*, va_list); 2146 /// ditto 2147 2148 alias PyUnicodeUCS2_FromFormatV PyUnicode_FromFormatV; 2149 2150 /// Availability: >= 2.6 2151 PyObject* PyUnicodeUCS2_FromFormat(const(char)*, ...); 2152 /// ditto 2153 2154 alias PyUnicodeUCS2_FromFormat PyUnicode_FromFormat; 2155 2156 2157 /** Format the object based on the format_spec, as defined in PEP 3101 2158 (Advanced String Formatting). */ 2159 /// Availability: >= 2.6 2160 PyObject* _PyUnicodeUCS2_FormatAdvanced(PyObject *obj, 2161 Py_UNICODE *format_spec, 2162 Py_ssize_t format_spec_len); 2163 /// ditto 2164 2165 alias _PyUnicodeUCS2_FormatAdvanced _PyUnicode_FormatAdvanced; 2166 2167 /// Availability: >= 2.6 2168 int PyUnicodeUCS2_ClearFreeList(); 2169 /// ditto 2170 2171 alias PyUnicodeUCS2_ClearFreeList PyUnicode_ClearFreeList; 2172 2173 /** 2174 Params: 2175 string = UTF-7 encoded string 2176 length = size of string 2177 error = error handling 2178 consumed = bytes consumed 2179 */ 2180 /// Availability: >= 2.6 2181 PyObject* PyUnicodeUCS2_DecodeUTF7Stateful( 2182 const(char)* string, 2183 Py_ssize_t length, 2184 const(char)*errors, 2185 Py_ssize_t *consumed 2186 ); 2187 /// ditto 2188 2189 alias PyUnicodeUCS2_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 2190 2191 /** 2192 Params: 2193 string = UTF-32 encoded string 2194 length = size of string 2195 error = error handling 2196 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2197 */ 2198 /// Availability: >= 2.6 2199 PyObject* PyUnicodeUCS2_DecodeUTF32( 2200 const(char)* string, 2201 Py_ssize_t length, 2202 const(char)*errors, 2203 int *byteorder 2204 ); 2205 /// ditto 2206 2207 alias PyUnicodeUCS2_DecodeUTF32 PyUnicode_DecodeUTF32; 2208 2209 2210 /** 2211 Params: 2212 string = UTF-32 encoded string 2213 length = size of string 2214 error = error handling 2215 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2216 */ 2217 /// Availability: >= 2.6 2218 PyObject* PyUnicodeUCS2_DecodeUTF32Stateful( 2219 const(char)*string, 2220 Py_ssize_t length, 2221 const(char)*errors, 2222 int *byteorder, 2223 Py_ssize_t *consumed 2224 ); 2225 /// ditto 2226 2227 alias PyUnicodeUCS2_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 2228 2229 /** Returns a Python string using the UTF-32 encoding in native byte 2230 order. The string always starts with a BOM mark. */ 2231 /// Availability: >= 2.6 2232 2233 PyObject* PyUnicodeUCS2_AsUTF32String( 2234 PyObject *unicode 2235 ); 2236 /// ditto 2237 2238 alias PyUnicodeUCS2_AsUTF32String PyUnicode_AsUTF32String; 2239 2240 2241 /** Returns a Python string object holding the UTF-32 encoded value of 2242 the Unicode data. 2243 2244 If byteorder is not 0, output is written according to the following 2245 byte order: 2246 2247 byteorder == -1: little endian 2248 byteorder == 0: native byte order (writes a BOM mark) 2249 byteorder == 1: big endian 2250 2251 If byteorder is 0, the output string will always start with the 2252 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2253 prepended. 2254 Params: 2255 data = Unicode char buffer 2256 length = number of Py_UNICODE chars to encode 2257 errors = error handling 2258 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 2259 2260 */ 2261 /// Availability: >= 2.6 2262 PyObject* PyUnicodeUCS2_EncodeUTF32( 2263 const Py_UNICODE *data, 2264 Py_ssize_t length, 2265 const(char)* errors, 2266 int byteorder 2267 ); 2268 /// ditto 2269 2270 alias PyUnicodeUCS2_EncodeUTF32 PyUnicode_EncodeUTF32; 2271 2272 } 2273 2274 /** Return a read-only pointer to the Unicode object's internal 2275 Py_UNICODE buffer. */ 2276 Py_UNICODE* PyUnicodeUCS2_AsUnicode(PyObject* unicode); 2277 /// ditto 2278 2279 alias PyUnicodeUCS2_AsUnicode PyUnicode_AsUnicode; 2280 2281 /** Get the length of the Unicode object. */ 2282 Py_ssize_t PyUnicodeUCS2_GetSize(PyObject* unicode); 2283 /// ditto 2284 2285 alias PyUnicodeUCS2_GetSize PyUnicode_GetSize; 2286 2287 2288 /** Get the maximum ordinal for a Unicode character. */ 2289 Py_UNICODE PyUnicodeUCS2_GetMax(); 2290 /// ditto 2291 2292 alias PyUnicodeUCS2_GetMax PyUnicode_GetMax; 2293 2294 2295 /** Resize an already allocated Unicode object to the new size length. 2296 2297 _*unicode is modified to point to the new (resized) object and 0 2298 returned on success. 2299 2300 This API may only be called by the function which also called the 2301 Unicode constructor. The refcount on the object must be 1. Otherwise, 2302 an error is returned. 2303 2304 Error handling is implemented as follows: an exception is set, -1 2305 is returned and *unicode left untouched. 2306 Params: 2307 unicode = pointer to the new unicode object. 2308 length = New length. 2309 2310 */ 2311 int PyUnicodeUCS2_Resize(PyObject** unicode, Py_ssize_t length); 2312 /// ditto 2313 2314 alias PyUnicodeUCS2_Resize PyUnicode_Resize; 2315 2316 /** Coerce obj to an Unicode object and return a reference with 2317 _*incremented* refcount. 2318 2319 Coercion is done in the following way: 2320 2321 1. String and other char buffer compatible objects are decoded 2322 under the assumptions that they contain data using the current 2323 default encoding. Decoding is done in "strict" mode. 2324 2325 2. All other objects (including Unicode objects) raise an 2326 exception. 2327 2328 The API returns NULL in case of an error. The caller is responsible 2329 for decref'ing the returned objects. 2330 2331 */ 2332 PyObject* PyUnicodeUCS2_FromEncodedObject( 2333 PyObject* obj, 2334 const(char)* encoding, 2335 const(char)* errors); 2336 /// ditto 2337 2338 alias PyUnicodeUCS2_FromEncodedObject PyUnicode_FromEncodedObject; 2339 2340 2341 /** Coerce obj to an Unicode object and return a reference with 2342 _*incremented* refcount. 2343 2344 Unicode objects are passed back as-is (subclasses are converted to 2345 true Unicode objects), all other objects are delegated to 2346 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 2347 using the default encoding as basis for decoding the object. 2348 2349 The API returns NULL in case of an error. The caller is responsible 2350 for decref'ing the returned objects. 2351 2352 */ 2353 PyObject* PyUnicodeUCS2_FromObject(PyObject* obj); 2354 /// ditto 2355 2356 alias PyUnicodeUCS2_FromObject PyUnicode_FromObject; 2357 2358 2359 /** Create a Unicode Object from the whcar_t buffer w of the given 2360 size. 2361 2362 The buffer is copied into the new object. */ 2363 PyObject* PyUnicodeUCS2_FromWideChar(const(wchar_t)* w, Py_ssize_t size); 2364 /// ditto 2365 2366 alias PyUnicodeUCS2_FromWideChar PyUnicode_FromWideChar; 2367 2368 2369 /** Copies the Unicode Object contents into the wchar_t buffer w. At 2370 most size wchar_t characters are copied. 2371 2372 Note that the resulting wchar_t string may or may not be 2373 0-terminated. It is the responsibility of the caller to make sure 2374 that the wchar_t string is 0-terminated in case this is required by 2375 the application. 2376 2377 Returns the number of wchar_t characters copied (excluding a 2378 possibly trailing 0-termination character) or -1 in case of an 2379 error. */ 2380 Py_ssize_t PyUnicodeUCS2_AsWideChar( 2381 PyUnicodeObject* unicode, 2382 const(wchar_t)* w, 2383 Py_ssize_t size); 2384 /// ditto 2385 2386 alias PyUnicodeUCS2_AsWideChar PyUnicode_AsWideChar; 2387 2388 2389 /** Create a Unicode Object from the given Unicode code point ordinal. 2390 2391 The ordinal must be in range(0x10000) on narrow Python builds 2392 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 2393 raised in case it is not. 2394 2395 */ 2396 PyObject* PyUnicodeUCS2_FromOrdinal(int ordinal); 2397 /// ditto 2398 2399 alias PyUnicodeUCS2_FromOrdinal PyUnicode_FromOrdinal; 2400 2401 2402 /** Return a Python string holding the default encoded value of the 2403 Unicode object. 2404 2405 The resulting string is cached in the Unicode object for subsequent 2406 usage by this function. The cached version is needed to implement 2407 the character buffer interface and will live (at least) as long as 2408 the Unicode object itself. 2409 2410 The refcount of the string is *not* incremented. 2411 2412 _*** Exported for internal use by the interpreter only !!! *** 2413 2414 */ 2415 PyObject* _PyUnicodeUCS2_AsDefaultEncodedString(PyObject *, const(char)*); 2416 /// ditto 2417 2418 alias _PyUnicodeUCS2_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 2419 2420 2421 /** Returns the currently active default encoding. 2422 2423 The default encoding is currently implemented as run-time settable 2424 process global. This may change in future versions of the 2425 interpreter to become a parameter which is managed on a per-thread 2426 basis. 2427 2428 */ 2429 const(char)* PyUnicodeUCS2_GetDefaultEncoding(); 2430 /// ditto 2431 2432 alias PyUnicodeUCS2_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 2433 2434 2435 /** Sets the currently active default encoding. 2436 2437 Returns 0 on success, -1 in case of an error. 2438 2439 */ 2440 int PyUnicodeUCS2_SetDefaultEncoding(const(char)*encoding); 2441 /// ditto 2442 2443 alias PyUnicodeUCS2_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 2444 2445 2446 /** Create a Unicode object by decoding the encoded string s of the 2447 given size. 2448 Params: 2449 s = encoded string 2450 size = size of buffer 2451 encoding = encoding 2452 errors = error handling 2453 */ 2454 PyObject* PyUnicodeUCS2_Decode( 2455 const(char)* s, 2456 Py_ssize_t size, 2457 const(char)* encoding, 2458 const(char)* errors); 2459 /// ditto 2460 2461 alias PyUnicodeUCS2_Decode PyUnicode_Decode; 2462 2463 2464 version(Python_3_0_Or_Later) { 2465 /** Decode a Unicode object unicode and return the result as Python 2466 object. */ 2467 /// Availability: 3.* 2468 2469 PyObject* PyUnicodeUCS2_AsDecodedObject( 2470 PyObject* unicode, 2471 const(char)* encoding, 2472 const(char)* errors 2473 ); 2474 /// ditto 2475 2476 alias PyUnicodeUCS2_AsDecodedObject PyUnicode_AsDecodedObject; 2477 2478 /** Decode a Unicode object unicode and return the result as Unicode 2479 object. */ 2480 /// Availability: 3.* 2481 2482 PyObject* PyUnicodeUCS2_AsDecodedUnicode( 2483 PyObject* unicode, 2484 const(char)* encoding, 2485 const(char)* errors 2486 ); 2487 /// ditto 2488 2489 alias PyUnicodeUCS2_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 2490 2491 } 2492 2493 /** Encodes a Py_UNICODE buffer of the given size and returns a 2494 Python string object. 2495 Params: 2496 s = Unicode char buffer 2497 size = number of Py_UNICODE chars to encode 2498 encoding = encoding 2499 errors = error handling 2500 */ 2501 PyObject* PyUnicodeUCS2_Encode( 2502 Py_UNICODE* s, 2503 Py_ssize_t size, 2504 const(char)* encoding, 2505 const(char)* errors); 2506 /// ditto 2507 2508 alias PyUnicodeUCS2_Encode PyUnicode_Encode; 2509 2510 2511 /** Encodes a Unicode object and returns the result as Python object. 2512 */ 2513 PyObject* PyUnicodeUCS2_AsEncodedObject( 2514 PyObject* unicode, 2515 const(char)* encoding, 2516 const(char)* errors); 2517 /// ditto 2518 2519 alias PyUnicodeUCS2_AsEncodedObject PyUnicode_AsEncodedObject; 2520 2521 2522 /** Encodes a Unicode object and returns the result as Python string 2523 object. */ 2524 PyObject* PyUnicodeUCS2_AsEncodedString( 2525 PyObject* unicode, 2526 const(char)* encoding, 2527 const(char)* errors); 2528 /// ditto 2529 2530 alias PyUnicodeUCS2_AsEncodedString PyUnicode_AsEncodedString; 2531 2532 2533 version(Python_3_0_Or_Later) { 2534 /** Encodes a Unicode object and returns the result as Unicode 2535 object. */ 2536 /// Availability: >= 3.* 2537 PyObject* PyUnicodeUCS2_AsEncodedUnicode( 2538 PyObject* unicode, 2539 const(char)* encoding, 2540 const(char)* errors 2541 ); 2542 /// ditto 2543 2544 alias PyUnicodeUCS2_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 2545 2546 } 2547 2548 /** 2549 Params: 2550 string = UTF-7 encoded string 2551 length = size of string 2552 errors = error handling 2553 */ 2554 PyObject* PyUnicodeUCS2_DecodeUTF7( 2555 const(char)* string, 2556 Py_ssize_t length, 2557 const(char)* errors); 2558 /// ditto 2559 2560 alias PyUnicodeUCS2_DecodeUTF7 PyUnicode_DecodeUTF7; 2561 2562 2563 /** 2564 Params: 2565 data = Unicode char buffer 2566 length = number of Py_UNICODE chars to encode 2567 base64SetO = Encode RFC2152 Set O characters in base64 2568 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 2569 errors = error handling 2570 */ 2571 PyObject* PyUnicodeUCS2_EncodeUTF7( 2572 Py_UNICODE* data, 2573 Py_ssize_t length, 2574 int encodeSetO, 2575 int encodeWhiteSpace, 2576 const(char)* errors 2577 ); 2578 /// ditto 2579 2580 alias PyUnicodeUCS2_EncodeUTF7 PyUnicode_EncodeUTF7; 2581 2582 2583 /// _ 2584 PyObject* PyUnicodeUCS2_DecodeUTF8( 2585 const(char)* string, 2586 Py_ssize_t length, 2587 const(char)* errors); 2588 /// ditto 2589 2590 alias PyUnicodeUCS2_DecodeUTF8 PyUnicode_DecodeUTF8; 2591 2592 /// _ 2593 PyObject* PyUnicodeUCS2_DecodeUTF8Stateful( 2594 const(char)* string, 2595 Py_ssize_t length, 2596 const(char)* errors, 2597 Py_ssize_t* consumed 2598 ); 2599 /// ditto 2600 2601 alias PyUnicodeUCS2_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 2602 2603 /// _ 2604 PyObject* PyUnicodeUCS2_AsUTF8String(PyObject* unicode); 2605 /// ditto 2606 2607 alias PyUnicodeUCS2_AsUTF8String PyUnicode_AsUTF8String; 2608 2609 /// _ 2610 PyObject* PyUnicodeUCS2_EncodeUTF8( 2611 Py_UNICODE* data, 2612 Py_ssize_t length, 2613 const(char) *errors); 2614 /// ditto 2615 2616 alias PyUnicodeUCS2_EncodeUTF8 PyUnicode_EncodeUTF8; 2617 2618 2619 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 2620 the corresponding Unicode object. 2621 2622 errors (if non-NULL) defines the error handling. It defaults 2623 to "strict". 2624 2625 If byteorder is non-NULL, the decoder starts decoding using the 2626 given byte order: 2627 2628 *byteorder == -1: little endian 2629 *byteorder == 0: native order 2630 *byteorder == 1: big endian 2631 2632 In native mode, the first two bytes of the stream are checked for a 2633 BOM mark. If found, the BOM mark is analysed, the byte order 2634 adjusted and the BOM skipped. In the other modes, no BOM mark 2635 interpretation is done. After completion, *byteorder is set to the 2636 current byte order at the end of input data. 2637 2638 If byteorder is NULL, the codec starts in native order mode. 2639 2640 */ 2641 PyObject* PyUnicodeUCS2_DecodeUTF16( 2642 const(char)* string, 2643 Py_ssize_t length, 2644 const(char)* errors, 2645 int* byteorder); 2646 /// ditto 2647 2648 alias PyUnicodeUCS2_DecodeUTF16 PyUnicode_DecodeUTF16; 2649 2650 /** 2651 Params: 2652 string = UTF-16 encoded string 2653 length = size of string 2654 errors = error handling 2655 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2656 consumed = bytes consumed 2657 */ 2658 PyObject* PyUnicodeUCS2_DecodeUTF16Stateful( 2659 const(char)* string, 2660 Py_ssize_t length, 2661 const(char)* errors, 2662 int* byteorder, 2663 Py_ssize_t* consumed 2664 ); 2665 /// ditto 2666 2667 alias PyUnicodeUCS2_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 2668 2669 /** Returns a Python string using the UTF-16 encoding in native byte 2670 order. The string always starts with a BOM mark. */ 2671 PyObject* PyUnicodeUCS2_AsUTF16String(PyObject *unicode); 2672 /// ditto 2673 2674 alias PyUnicodeUCS2_AsUTF16String PyUnicode_AsUTF16String; 2675 2676 /** Returns a Python string object holding the UTF-16 encoded value of 2677 the Unicode data. 2678 2679 If byteorder is not 0, output is written according to the following 2680 byte order: 2681 2682 byteorder == -1: little endian 2683 byteorder == 0: native byte order (writes a BOM mark) 2684 byteorder == 1: big endian 2685 2686 If byteorder is 0, the output string will always start with the 2687 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2688 prepended. 2689 2690 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 2691 UCS-2. This trick makes it possible to add full UTF-16 capabilities 2692 at a later point without compromising the APIs. 2693 2694 */ 2695 PyObject* PyUnicodeUCS2_EncodeUTF16( 2696 Py_UNICODE* data, 2697 Py_ssize_t length, 2698 const(char)* errors, 2699 int byteorder 2700 ); 2701 /// ditto 2702 2703 alias PyUnicodeUCS2_EncodeUTF16 PyUnicode_EncodeUTF16; 2704 2705 2706 /// _ 2707 PyObject* PyUnicodeUCS2_DecodeUnicodeEscape( 2708 const(char)* string, 2709 Py_ssize_t length, 2710 const(char)* errors); 2711 /// ditto 2712 2713 alias PyUnicodeUCS2_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 2714 2715 /// _ 2716 PyObject* PyUnicodeUCS2_AsUnicodeEscapeString( 2717 PyObject* unicode); 2718 /// ditto 2719 2720 alias PyUnicodeUCS2_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 2721 2722 /// _ 2723 PyObject* PyUnicodeUCS2_EncodeUnicodeEscape( 2724 Py_UNICODE* data, 2725 Py_ssize_t length); 2726 /// ditto 2727 2728 alias PyUnicodeUCS2_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 2729 2730 /** 2731 Params: 2732 string = Raw-Unicode-Escape encoded string 2733 length = size of string 2734 errors = error handling 2735 */ 2736 PyObject* PyUnicodeUCS2_DecodeRawUnicodeEscape( 2737 const(char)* string, 2738 Py_ssize_t length, 2739 const(char)* errors); 2740 /// ditto 2741 2742 alias PyUnicodeUCS2_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 2743 2744 /// _ 2745 PyObject* PyUnicodeUCS2_AsRawUnicodeEscapeString(PyObject* unicode); 2746 /// ditto 2747 2748 alias PyUnicodeUCS2_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 2749 2750 /// _ 2751 PyObject* PyUnicodeUCS2_EncodeRawUnicodeEscape( 2752 Py_UNICODE* data, Py_ssize_t length); 2753 /// ditto 2754 2755 alias PyUnicodeUCS2_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 2756 2757 2758 /// _ 2759 PyObject* _PyUnicodeUCS2_DecodeUnicodeInternal( 2760 const(char)* string, 2761 Py_ssize_t length, 2762 const(char)* errors); 2763 /// ditto 2764 2765 alias _PyUnicodeUCS2_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 2766 2767 2768 /** 2769 Params: 2770 string = Latin-1 encoded string 2771 length = size of string 2772 errors = error handling 2773 */ 2774 PyObject* PyUnicodeUCS2_DecodeLatin1( 2775 const(char)* string, 2776 Py_ssize_t length, 2777 const(char)* errors); 2778 /// ditto 2779 2780 alias PyUnicodeUCS2_DecodeLatin1 PyUnicode_DecodeLatin1; 2781 2782 /// _ 2783 PyObject* PyUnicodeUCS2_AsLatin1String(PyObject *unicode); 2784 /// ditto 2785 2786 alias PyUnicodeUCS2_AsLatin1String PyUnicode_AsLatin1String; 2787 2788 /** 2789 Params: 2790 data = Unicode char buffer 2791 length = Number of Py_UNICODE chars to encode 2792 errors = error handling 2793 */ 2794 PyObject* PyUnicodeUCS2_EncodeLatin1( 2795 Py_UNICODE* data, 2796 Py_ssize_t length, 2797 const(char)* errors); 2798 /// ditto 2799 2800 alias PyUnicodeUCS2_EncodeLatin1 PyUnicode_EncodeLatin1; 2801 2802 2803 /** 2804 Params: 2805 data = Unicode char buffer 2806 length = Number of Py_UNICODE chars to encode 2807 errors = error handling 2808 */ 2809 PyObject* PyUnicodeUCS2_DecodeASCII( 2810 const(char)* string, 2811 Py_ssize_t length, 2812 const(char)* errors); 2813 /// ditto 2814 2815 alias PyUnicodeUCS2_DecodeASCII PyUnicode_DecodeASCII; 2816 2817 /// _ 2818 PyObject* PyUnicodeUCS2_AsASCIIString(PyObject *unicode); 2819 /// ditto 2820 2821 alias PyUnicodeUCS2_AsASCIIString PyUnicode_AsASCIIString; 2822 2823 /** 2824 Params: 2825 data = Unicode char buffer 2826 length = Number of Py_UNICODE chars to encode 2827 errors = error handling 2828 */ 2829 PyObject* PyUnicodeUCS2_EncodeASCII( 2830 Py_UNICODE* data, 2831 Py_ssize_t length, 2832 const(char)* errors); 2833 /// ditto 2834 2835 alias PyUnicodeUCS2_EncodeASCII PyUnicode_EncodeASCII; 2836 2837 2838 /** 2839 Params: 2840 string = Encoded string 2841 length = size of string 2842 mapping = character mapping (char ordinal -> unicode ordinal) 2843 errors = error handling 2844 */ 2845 PyObject* PyUnicodeUCS2_DecodeCharmap( 2846 const(char)* string, 2847 Py_ssize_t length, 2848 PyObject* mapping, 2849 const(char)* errors 2850 ); 2851 /// ditto 2852 2853 alias PyUnicodeUCS2_DecodeCharmap PyUnicode_DecodeCharmap; 2854 2855 /** 2856 Params: 2857 unicode = Unicode object 2858 mapping = character mapping (unicode ordinal -> char ordinal) 2859 */ 2860 PyObject* PyUnicodeUCS2_AsCharmapString( 2861 PyObject* unicode, 2862 PyObject* mapping); 2863 /// ditto 2864 2865 alias PyUnicodeUCS2_AsCharmapString PyUnicode_AsCharmapString; 2866 2867 /** 2868 Params: 2869 data = Unicode char buffer 2870 length = Number of Py_UNICODE chars to encode 2871 mapping = character mapping (unicode ordinal -> char ordinal) 2872 errors = error handling 2873 */ 2874 PyObject* PyUnicodeUCS2_EncodeCharmap( 2875 Py_UNICODE* data, 2876 Py_ssize_t length, 2877 PyObject* mapping, 2878 const(char)* errors 2879 ); 2880 /// ditto 2881 2882 alias PyUnicodeUCS2_EncodeCharmap PyUnicode_EncodeCharmap; 2883 2884 /** Translate a Py_UNICODE buffer of the given length by applying a 2885 character mapping table to it and return the resulting Unicode 2886 object. 2887 2888 The mapping table must map Unicode ordinal integers to Unicode 2889 ordinal integers or None (causing deletion of the character). 2890 2891 Mapping tables may be dictionaries or sequences. Unmapped character 2892 ordinals (ones which cause a LookupError) are left untouched and 2893 are copied as-is. 2894 2895 */ 2896 PyObject* PyUnicodeUCS2_TranslateCharmap( 2897 Py_UNICODE* data, 2898 Py_ssize_t length, 2899 PyObject* table, 2900 const(char)* errors 2901 ); 2902 /// ditto 2903 2904 alias PyUnicodeUCS2_TranslateCharmap PyUnicode_TranslateCharmap; 2905 2906 2907 version (Windows) { 2908 /// Availability: Windows only 2909 PyObject* PyUnicodeUCS2_DecodeMBCS( 2910 const(char)* string, 2911 Py_ssize_t length, 2912 const(char)* errors); 2913 /// ditto 2914 2915 alias PyUnicodeUCS2_DecodeMBCS PyUnicode_DecodeMBCS; 2916 2917 /// Availability: Windows only 2918 PyObject* PyUnicodeUCS2_AsMBCSString(PyObject* unicode); 2919 /// ditto 2920 2921 alias PyUnicodeUCS2_AsMBCSString PyUnicode_AsMBCSString; 2922 2923 /// Availability: Windows only 2924 PyObject* PyUnicodeUCS2_EncodeMBCS( 2925 Py_UNICODE* data, 2926 Py_ssize_t length, 2927 const(char)* errors); 2928 /// ditto 2929 2930 alias PyUnicodeUCS2_EncodeMBCS PyUnicode_EncodeMBCS; 2931 2932 } 2933 /** Takes a Unicode string holding a decimal value and writes it into 2934 an output buffer using standard ASCII digit codes. 2935 2936 The output buffer has to provide at least length+1 bytes of storage 2937 area. The output string is 0-terminated. 2938 2939 The encoder converts whitespace to ' ', decimal characters to their 2940 corresponding ASCII digit and all other Latin-1 characters except 2941 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 2942 are treated as errors. This includes embedded NULL bytes. 2943 2944 Error handling is defined by the errors argument: 2945 2946 NULL or "strict": raise a ValueError 2947 "ignore": ignore the wrong characters (these are not copied to the 2948 output buffer) 2949 "replace": replaces illegal characters with '?' 2950 2951 Returns 0 on success, -1 on failure. 2952 2953 */ 2954 int PyUnicodeUCS2_EncodeDecimal( 2955 Py_UNICODE* s, 2956 Py_ssize_t length, 2957 char* output, 2958 const(char)* errors); 2959 /// ditto 2960 2961 alias PyUnicodeUCS2_EncodeDecimal PyUnicode_EncodeDecimal; 2962 2963 2964 /** Concat two strings giving a new Unicode string. */ 2965 PyObject* PyUnicodeUCS2_Concat( 2966 PyObject* left, 2967 PyObject* right); 2968 /// ditto 2969 2970 alias PyUnicodeUCS2_Concat PyUnicode_Concat; 2971 2972 2973 version(Python_3_0_Or_Later) { 2974 /** Concat two strings and put the result in *pleft 2975 (sets *pleft to NULL on error) 2976 Params: 2977 pleft = Pointer to left string 2978 right = Right string 2979 */ 2980 /// Availability: 3.* 2981 2982 void PyUnicodeUCS2_Append( 2983 PyObject** pleft, 2984 PyObject* right 2985 ); 2986 /// ditto 2987 2988 alias PyUnicodeUCS2_Append PyUnicode_Append; 2989 2990 2991 /** Concat two strings, put the result in *pleft and drop the right object 2992 (sets *pleft to NULL on error) 2993 Params: 2994 pleft = Pointer to left string 2995 */ 2996 /// Availability: 3.* 2997 void PyUnicodeUCS2_AppendAndDel( 2998 PyObject** pleft, 2999 PyObject* right 3000 ); 3001 /// ditto 3002 3003 alias PyUnicodeUCS2_AppendAndDel PyUnicode_AppendAndDel; 3004 3005 } 3006 3007 /** Split a string giving a list of Unicode strings. 3008 3009 If sep is NULL, splitting will be done at all whitespace 3010 substrings. Otherwise, splits occur at the given separator. 3011 3012 At most maxsplit splits will be done. If negative, no limit is set. 3013 3014 Separators are not included in the resulting list. 3015 3016 */ 3017 PyObject* PyUnicodeUCS2_Split( 3018 PyObject* s, 3019 PyObject* sep, 3020 Py_ssize_t maxsplit); 3021 /// ditto 3022 3023 alias PyUnicodeUCS2_Split PyUnicode_Split; 3024 3025 3026 /** Ditto PyUnicode_Split, but split at line breaks. 3027 3028 CRLF is considered to be one line break. Line breaks are not 3029 included in the resulting list. */ 3030 PyObject* PyUnicodeUCS2_Splitlines( 3031 PyObject* s, 3032 int keepends); 3033 /// ditto 3034 3035 alias PyUnicodeUCS2_Splitlines PyUnicode_Splitlines; 3036 3037 3038 version(Python_2_5_Or_Later) { 3039 /** Partition a string using a given separator. */ 3040 /// Availability: >= 2.5 3041 PyObject* PyUnicodeUCS2_Partition( 3042 PyObject* s, 3043 PyObject* sep 3044 ); 3045 /// ditto 3046 3047 alias PyUnicodeUCS2_Partition PyUnicode_Partition; 3048 3049 3050 /** Partition a string using a given separator, searching from the end 3051 of the string. */ 3052 3053 PyObject* PyUnicodeUCS2_RPartition( 3054 PyObject* s, 3055 PyObject* sep 3056 ); 3057 /// ditto 3058 3059 alias PyUnicodeUCS2_RPartition PyUnicode_RPartition; 3060 3061 } 3062 3063 /** Split a string giving a list of Unicode strings. 3064 3065 If sep is NULL, splitting will be done at all whitespace 3066 substrings. Otherwise, splits occur at the given separator. 3067 3068 At most maxsplit splits will be done. But unlike PyUnicode_Split 3069 PyUnicode_RSplit splits from the end of the string. If negative, 3070 no limit is set. 3071 3072 Separators are not included in the resulting list. 3073 3074 */ 3075 PyObject* PyUnicodeUCS2_RSplit( 3076 PyObject* s, 3077 PyObject* sep, 3078 Py_ssize_t maxsplit); 3079 /// ditto 3080 3081 alias PyUnicodeUCS2_RSplit PyUnicode_RSplit; 3082 3083 3084 /** Translate a string by applying a character mapping table to it and 3085 return the resulting Unicode object. 3086 3087 The mapping table must map Unicode ordinal integers to Unicode 3088 ordinal integers or None (causing deletion of the character). 3089 3090 Mapping tables may be dictionaries or sequences. Unmapped character 3091 ordinals (ones which cause a LookupError) are left untouched and 3092 are copied as-is. 3093 3094 */ 3095 PyObject* PyUnicodeUCS2_Translate( 3096 PyObject* str, 3097 PyObject* table, 3098 const(char)* errors); 3099 /// ditto 3100 3101 alias PyUnicodeUCS2_Translate PyUnicode_Translate; 3102 3103 3104 /** Join a sequence of strings using the given separator and return 3105 the resulting Unicode string. */ 3106 PyObject* PyUnicodeUCS2_Join( 3107 PyObject* separator, 3108 PyObject* seq); 3109 /// ditto 3110 3111 alias PyUnicodeUCS2_Join PyUnicode_Join; 3112 3113 3114 /** Return 1 if substr matches str[start:end] at the given tail end, 0 3115 otherwise. */ 3116 Py_ssize_t PyUnicodeUCS2_Tailmatch( 3117 PyObject* str, 3118 PyObject* substr, 3119 Py_ssize_t start, 3120 Py_ssize_t end, 3121 int direction 3122 ); 3123 /// ditto 3124 3125 alias PyUnicodeUCS2_Tailmatch PyUnicode_Tailmatch; 3126 3127 3128 /** Return the first position of substr in str[start:end] using the 3129 given search direction or -1 if not found. -2 is returned in case 3130 an error occurred and an exception is set. */ 3131 Py_ssize_t PyUnicodeUCS2_Find( 3132 PyObject* str, 3133 PyObject* substr, 3134 Py_ssize_t start, 3135 Py_ssize_t end, 3136 int direction 3137 ); 3138 /// ditto 3139 3140 alias PyUnicodeUCS2_Find PyUnicode_Find; 3141 3142 3143 /** Count the number of occurrences of substr in str[start:end]. */ 3144 Py_ssize_t PyUnicodeUCS2_Count( 3145 PyObject* str, 3146 PyObject* substr, 3147 Py_ssize_t start, 3148 Py_ssize_t end); 3149 /// ditto 3150 3151 alias PyUnicodeUCS2_Count PyUnicode_Count; 3152 3153 3154 /** Replace at most maxcount occurrences of substr in str with replstr 3155 and return the resulting Unicode object. */ 3156 PyObject* PyUnicodeUCS2_Replace( 3157 PyObject* str, 3158 PyObject* substr, 3159 PyObject* replstr, 3160 Py_ssize_t maxcount 3161 ); 3162 /// ditto 3163 3164 alias PyUnicodeUCS2_Replace PyUnicode_Replace; 3165 3166 3167 /** Compare two strings and return -1, 0, 1 for less than, equal, 3168 greater than resp. */ 3169 int PyUnicodeUCS2_Compare(PyObject* left, PyObject* right); 3170 /// ditto 3171 3172 alias PyUnicodeUCS2_Compare PyUnicode_Compare; 3173 3174 version(Python_3_0_Or_Later) { 3175 /** Compare two strings and return -1, 0, 1 for less than, equal, 3176 greater than resp. 3177 Params: 3178 left = 3179 right = ASCII-encoded string 3180 */ 3181 /// Availability: 3.* 3182 int PyUnicodeUCS2_CompareWithASCIIString( 3183 PyObject* left, 3184 const(char)* right 3185 ); 3186 /// ditto 3187 3188 alias PyUnicodeUCS2_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 3189 3190 } 3191 3192 version(Python_2_5_Or_Later) { 3193 /** Rich compare two strings and return one of the following: 3194 3195 - NULL in case an exception was raised 3196 - Py_True or Py_False for successfuly comparisons 3197 - Py_NotImplemented in case the type combination is unknown 3198 3199 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 3200 case the conversion of the arguments to Unicode fails with a 3201 UnicodeDecodeError. 3202 3203 Possible values for op: 3204 3205 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 3206 3207 */ 3208 /// Availability: >= 2.5 3209 PyObject* PyUnicodeUCS2_RichCompare( 3210 PyObject* left, 3211 PyObject* right, 3212 int op 3213 ); 3214 /// ditto 3215 3216 alias PyUnicodeUCS2_RichCompare PyUnicode_RichCompare; 3217 3218 } 3219 3220 /** Apply a argument tuple or dictionary to a format string and return 3221 the resulting Unicode string. */ 3222 PyObject* PyUnicodeUCS2_Format(PyObject* format, PyObject* args); 3223 /// ditto 3224 3225 alias PyUnicodeUCS2_Format PyUnicode_Format; 3226 3227 3228 /** Checks whether element is contained in container and return 1/0 3229 accordingly. 3230 3231 element has to coerce to an one element Unicode string. -1 is 3232 returned in case of an error. */ 3233 int PyUnicodeUCS2_Contains(PyObject* container, PyObject* element); 3234 /// ditto 3235 3236 alias PyUnicodeUCS2_Contains PyUnicode_Contains; 3237 3238 3239 version(Python_3_0_Or_Later) { 3240 /** Checks whether argument is a valid identifier. */ 3241 /// Availability: 3.* 3242 int PyUnicodeUCS2_IsIdentifier(PyObject* s); 3243 /// ditto 3244 3245 alias PyUnicodeUCS2_IsIdentifier PyUnicode_IsIdentifier; 3246 3247 } 3248 3249 3250 /// _ 3251 int _PyUnicodeUCS2_IsLowercase(Py_UNICODE ch); 3252 /// ditto 3253 3254 alias _PyUnicodeUCS2_IsLowercase _PyUnicode_IsLowercase; 3255 3256 /// _ 3257 int _PyUnicodeUCS2_IsUppercase(Py_UNICODE ch); 3258 /// ditto 3259 3260 alias _PyUnicodeUCS2_IsUppercase _PyUnicode_IsUppercase; 3261 3262 /// _ 3263 int _PyUnicodeUCS2_IsTitlecase(Py_UNICODE ch); 3264 /// ditto 3265 3266 alias _PyUnicodeUCS2_IsTitlecase _PyUnicode_IsTitlecase; 3267 3268 /// _ 3269 int _PyUnicodeUCS2_IsWhitespace(Py_UNICODE ch); 3270 /// ditto 3271 3272 alias _PyUnicodeUCS2_IsWhitespace _PyUnicode_IsWhitespace; 3273 3274 /// _ 3275 int _PyUnicodeUCS2_IsLinebreak(Py_UNICODE ch); 3276 /// ditto 3277 3278 alias _PyUnicodeUCS2_IsLinebreak _PyUnicode_IsLinebreak; 3279 3280 /// _ 3281 Py_UNICODE _PyUnicodeUCS2_ToLowercase(Py_UNICODE ch); 3282 /// ditto 3283 3284 alias _PyUnicodeUCS2_ToLowercase _PyUnicode_ToLowercase; 3285 3286 /// _ 3287 Py_UNICODE _PyUnicodeUCS2_ToUppercase(Py_UNICODE ch); 3288 /// ditto 3289 3290 alias _PyUnicodeUCS2_ToUppercase _PyUnicode_ToUppercase; 3291 3292 /// _ 3293 Py_UNICODE _PyUnicodeUCS2_ToTitlecase(Py_UNICODE ch); 3294 /// ditto 3295 3296 alias _PyUnicodeUCS2_ToTitlecase _PyUnicode_ToTitlecase; 3297 3298 /// _ 3299 int _PyUnicodeUCS2_ToDecimalDigit(Py_UNICODE ch); 3300 /// ditto 3301 3302 alias _PyUnicodeUCS2_ToDecimalDigit _PyUnicode_ToDecimalDigit; 3303 3304 /// _ 3305 int _PyUnicodeUCS2_ToDigit(Py_UNICODE ch); 3306 /// ditto 3307 3308 alias _PyUnicodeUCS2_ToDigit _PyUnicode_ToDigit; 3309 3310 /// _ 3311 double _PyUnicodeUCS2_ToNumeric(Py_UNICODE ch); 3312 /// ditto 3313 3314 alias _PyUnicodeUCS2_ToNumeric _PyUnicode_ToNumeric; 3315 3316 /// _ 3317 int _PyUnicodeUCS2_IsDecimalDigit(Py_UNICODE ch); 3318 /// ditto 3319 3320 alias _PyUnicodeUCS2_IsDecimalDigit _PyUnicode_IsDecimalDigit; 3321 3322 /// _ 3323 int _PyUnicodeUCS2_IsDigit(Py_UNICODE ch); 3324 /// ditto 3325 3326 alias _PyUnicodeUCS2_IsDigit _PyUnicode_IsDigit; 3327 3328 /// _ 3329 int _PyUnicodeUCS2_IsNumeric(Py_UNICODE ch); 3330 /// ditto 3331 3332 alias _PyUnicodeUCS2_IsNumeric _PyUnicode_IsNumeric; 3333 3334 /// _ 3335 int _PyUnicodeUCS2_IsAlpha(Py_UNICODE ch); 3336 /// ditto 3337 3338 alias _PyUnicodeUCS2_IsAlpha _PyUnicode_IsAlpha; 3339 3340 }else{ 3341 3342 version(Python_2_6_Or_Later) { 3343 3344 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 3345 size. 3346 3347 u may be NULL which causes the contents to be undefined. It is the 3348 user's responsibility to fill in the needed data afterwards. Note 3349 that modifying the Unicode object contents after construction is 3350 only allowed if u was set to NULL. 3351 3352 The buffer is copied into the new object. */ 3353 /// Availability: >= 2.6 3354 PyObject* PyUnicodeUCS4_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 3355 /// ditto 3356 3357 alias PyUnicodeUCS4_FromUnicode PyUnicode_FromUnicode; 3358 3359 3360 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 3361 /// Availability: >= 2.6 3362 PyObject* PyUnicodeUCS4_FromStringAndSize( 3363 const(char)*u, /* char buffer */ 3364 Py_ssize_t size /* size of buffer */ 3365 ); 3366 /// ditto 3367 3368 alias PyUnicodeUCS4_FromStringAndSize PyUnicode_FromStringAndSize; 3369 3370 3371 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 3372 Latin-1 encoded bytes */ 3373 /// Availability: >= 2.6 3374 PyObject* PyUnicodeUCS4_FromString( 3375 const(char)*u /* string */ 3376 ); 3377 /// ditto 3378 3379 alias PyUnicodeUCS4_FromString PyUnicode_FromString; 3380 3381 /// Availability: >= 2.6 3382 PyObject* PyUnicodeUCS4_FromFormatV(const(char)*, va_list); 3383 /// ditto 3384 3385 alias PyUnicodeUCS4_FromFormatV PyUnicode_FromFormatV; 3386 3387 /// Availability: >= 2.6 3388 PyObject* PyUnicodeUCS4_FromFormat(const(char)*, ...); 3389 /// ditto 3390 3391 alias PyUnicodeUCS4_FromFormat PyUnicode_FromFormat; 3392 3393 3394 /** Format the object based on the format_spec, as defined in PEP 3101 3395 (Advanced String Formatting). */ 3396 /// Availability: >= 2.6 3397 PyObject* _PyUnicodeUCS4_FormatAdvanced(PyObject *obj, 3398 Py_UNICODE *format_spec, 3399 Py_ssize_t format_spec_len); 3400 /// ditto 3401 3402 alias _PyUnicodeUCS4_FormatAdvanced _PyUnicode_FormatAdvanced; 3403 3404 /// Availability: >= 2.6 3405 int PyUnicodeUCS4_ClearFreeList(); 3406 /// ditto 3407 3408 alias PyUnicodeUCS4_ClearFreeList PyUnicode_ClearFreeList; 3409 3410 /** 3411 Params: 3412 string = UTF-7 encoded string 3413 length = size of string 3414 error = error handling 3415 consumed = bytes consumed 3416 */ 3417 /// Availability: >= 2.6 3418 PyObject* PyUnicodeUCS4_DecodeUTF7Stateful( 3419 const(char)* string, 3420 Py_ssize_t length, 3421 const(char)*errors, 3422 Py_ssize_t *consumed 3423 ); 3424 /// ditto 3425 3426 alias PyUnicodeUCS4_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 3427 3428 /** 3429 Params: 3430 string = UTF-32 encoded string 3431 length = size of string 3432 error = error handling 3433 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3434 */ 3435 /// Availability: >= 2.6 3436 PyObject* PyUnicodeUCS4_DecodeUTF32( 3437 const(char)* string, 3438 Py_ssize_t length, 3439 const(char)*errors, 3440 int *byteorder 3441 ); 3442 /// ditto 3443 3444 alias PyUnicodeUCS4_DecodeUTF32 PyUnicode_DecodeUTF32; 3445 3446 3447 /** 3448 Params: 3449 string = UTF-32 encoded string 3450 length = size of string 3451 error = error handling 3452 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3453 */ 3454 /// Availability: >= 2.6 3455 PyObject* PyUnicodeUCS4_DecodeUTF32Stateful( 3456 const(char)*string, 3457 Py_ssize_t length, 3458 const(char)*errors, 3459 int *byteorder, 3460 Py_ssize_t *consumed 3461 ); 3462 /// ditto 3463 3464 alias PyUnicodeUCS4_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 3465 3466 /** Returns a Python string using the UTF-32 encoding in native byte 3467 order. The string always starts with a BOM mark. */ 3468 /// Availability: >= 2.6 3469 3470 PyObject* PyUnicodeUCS4_AsUTF32String( 3471 PyObject *unicode 3472 ); 3473 /// ditto 3474 3475 alias PyUnicodeUCS4_AsUTF32String PyUnicode_AsUTF32String; 3476 3477 3478 /** Returns a Python string object holding the UTF-32 encoded value of 3479 the Unicode data. 3480 3481 If byteorder is not 0, output is written according to the following 3482 byte order: 3483 3484 byteorder == -1: little endian 3485 byteorder == 0: native byte order (writes a BOM mark) 3486 byteorder == 1: big endian 3487 3488 If byteorder is 0, the output string will always start with the 3489 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3490 prepended. 3491 Params: 3492 data = Unicode char buffer 3493 length = number of Py_UNICODE chars to encode 3494 errors = error handling 3495 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 3496 3497 */ 3498 /// Availability: >= 2.6 3499 PyObject* PyUnicodeUCS4_EncodeUTF32( 3500 const Py_UNICODE *data, 3501 Py_ssize_t length, 3502 const(char)* errors, 3503 int byteorder 3504 ); 3505 /// ditto 3506 3507 alias PyUnicodeUCS4_EncodeUTF32 PyUnicode_EncodeUTF32; 3508 3509 } 3510 3511 /** Return a read-only pointer to the Unicode object's internal 3512 Py_UNICODE buffer. */ 3513 Py_UNICODE* PyUnicodeUCS4_AsUnicode(PyObject* unicode); 3514 /// ditto 3515 3516 alias PyUnicodeUCS4_AsUnicode PyUnicode_AsUnicode; 3517 3518 /** Get the length of the Unicode object. */ 3519 Py_ssize_t PyUnicodeUCS4_GetSize(PyObject* unicode); 3520 /// ditto 3521 3522 alias PyUnicodeUCS4_GetSize PyUnicode_GetSize; 3523 3524 3525 /** Get the maximum ordinal for a Unicode character. */ 3526 Py_UNICODE PyUnicodeUCS4_GetMax(); 3527 /// ditto 3528 3529 alias PyUnicodeUCS4_GetMax PyUnicode_GetMax; 3530 3531 3532 /** Resize an already allocated Unicode object to the new size length. 3533 3534 _*unicode is modified to point to the new (resized) object and 0 3535 returned on success. 3536 3537 This API may only be called by the function which also called the 3538 Unicode constructor. The refcount on the object must be 1. Otherwise, 3539 an error is returned. 3540 3541 Error handling is implemented as follows: an exception is set, -1 3542 is returned and *unicode left untouched. 3543 Params: 3544 unicode = pointer to the new unicode object. 3545 length = New length. 3546 3547 */ 3548 int PyUnicodeUCS4_Resize(PyObject** unicode, Py_ssize_t length); 3549 /// ditto 3550 3551 alias PyUnicodeUCS4_Resize PyUnicode_Resize; 3552 3553 /** Coerce obj to an Unicode object and return a reference with 3554 _*incremented* refcount. 3555 3556 Coercion is done in the following way: 3557 3558 1. String and other char buffer compatible objects are decoded 3559 under the assumptions that they contain data using the current 3560 default encoding. Decoding is done in "strict" mode. 3561 3562 2. All other objects (including Unicode objects) raise an 3563 exception. 3564 3565 The API returns NULL in case of an error. The caller is responsible 3566 for decref'ing the returned objects. 3567 3568 */ 3569 PyObject* PyUnicodeUCS4_FromEncodedObject( 3570 PyObject* obj, 3571 const(char)* encoding, 3572 const(char)* errors); 3573 /// ditto 3574 3575 alias PyUnicodeUCS4_FromEncodedObject PyUnicode_FromEncodedObject; 3576 3577 3578 /** Coerce obj to an Unicode object and return a reference with 3579 _*incremented* refcount. 3580 3581 Unicode objects are passed back as-is (subclasses are converted to 3582 true Unicode objects), all other objects are delegated to 3583 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 3584 using the default encoding as basis for decoding the object. 3585 3586 The API returns NULL in case of an error. The caller is responsible 3587 for decref'ing the returned objects. 3588 3589 */ 3590 PyObject* PyUnicodeUCS4_FromObject(PyObject* obj); 3591 /// ditto 3592 3593 alias PyUnicodeUCS4_FromObject PyUnicode_FromObject; 3594 3595 3596 /** Create a Unicode Object from the whcar_t buffer w of the given 3597 size. 3598 3599 The buffer is copied into the new object. */ 3600 PyObject* PyUnicodeUCS4_FromWideChar(const(wchar_t)* w, Py_ssize_t size); 3601 /// ditto 3602 3603 alias PyUnicodeUCS4_FromWideChar PyUnicode_FromWideChar; 3604 3605 3606 /** Copies the Unicode Object contents into the wchar_t buffer w. At 3607 most size wchar_t characters are copied. 3608 3609 Note that the resulting wchar_t string may or may not be 3610 0-terminated. It is the responsibility of the caller to make sure 3611 that the wchar_t string is 0-terminated in case this is required by 3612 the application. 3613 3614 Returns the number of wchar_t characters copied (excluding a 3615 possibly trailing 0-termination character) or -1 in case of an 3616 error. */ 3617 Py_ssize_t PyUnicodeUCS4_AsWideChar( 3618 PyUnicodeObject* unicode, 3619 const(wchar_t)* w, 3620 Py_ssize_t size); 3621 /// ditto 3622 3623 alias PyUnicodeUCS4_AsWideChar PyUnicode_AsWideChar; 3624 3625 3626 /** Create a Unicode Object from the given Unicode code point ordinal. 3627 3628 The ordinal must be in range(0x10000) on narrow Python builds 3629 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 3630 raised in case it is not. 3631 3632 */ 3633 PyObject* PyUnicodeUCS4_FromOrdinal(int ordinal); 3634 /// ditto 3635 3636 alias PyUnicodeUCS4_FromOrdinal PyUnicode_FromOrdinal; 3637 3638 3639 /** Return a Python string holding the default encoded value of the 3640 Unicode object. 3641 3642 The resulting string is cached in the Unicode object for subsequent 3643 usage by this function. The cached version is needed to implement 3644 the character buffer interface and will live (at least) as long as 3645 the Unicode object itself. 3646 3647 The refcount of the string is *not* incremented. 3648 3649 _*** Exported for internal use by the interpreter only !!! *** 3650 3651 */ 3652 PyObject* _PyUnicodeUCS4_AsDefaultEncodedString(PyObject *, const(char)*); 3653 /// ditto 3654 3655 alias _PyUnicodeUCS4_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 3656 3657 3658 /** Returns the currently active default encoding. 3659 3660 The default encoding is currently implemented as run-time settable 3661 process global. This may change in future versions of the 3662 interpreter to become a parameter which is managed on a per-thread 3663 basis. 3664 3665 */ 3666 const(char)* PyUnicodeUCS4_GetDefaultEncoding(); 3667 /// ditto 3668 3669 alias PyUnicodeUCS4_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 3670 3671 3672 /** Sets the currently active default encoding. 3673 3674 Returns 0 on success, -1 in case of an error. 3675 3676 */ 3677 int PyUnicodeUCS4_SetDefaultEncoding(const(char)*encoding); 3678 /// ditto 3679 3680 alias PyUnicodeUCS4_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 3681 3682 3683 /** Create a Unicode object by decoding the encoded string s of the 3684 given size. 3685 Params: 3686 s = encoded string 3687 size = size of buffer 3688 encoding = encoding 3689 errors = error handling 3690 */ 3691 PyObject* PyUnicodeUCS4_Decode( 3692 const(char)* s, 3693 Py_ssize_t size, 3694 const(char)* encoding, 3695 const(char)* errors); 3696 /// ditto 3697 3698 alias PyUnicodeUCS4_Decode PyUnicode_Decode; 3699 3700 3701 version(Python_3_0_Or_Later) { 3702 /** Decode a Unicode object unicode and return the result as Python 3703 object. */ 3704 /// Availability: 3.* 3705 3706 PyObject* PyUnicodeUCS4_AsDecodedObject( 3707 PyObject* unicode, 3708 const(char)* encoding, 3709 const(char)* errors 3710 ); 3711 /// ditto 3712 3713 alias PyUnicodeUCS4_AsDecodedObject PyUnicode_AsDecodedObject; 3714 3715 /** Decode a Unicode object unicode and return the result as Unicode 3716 object. */ 3717 /// Availability: 3.* 3718 3719 PyObject* PyUnicodeUCS4_AsDecodedUnicode( 3720 PyObject* unicode, 3721 const(char)* encoding, 3722 const(char)* errors 3723 ); 3724 /// ditto 3725 3726 alias PyUnicodeUCS4_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 3727 3728 } 3729 3730 /** Encodes a Py_UNICODE buffer of the given size and returns a 3731 Python string object. 3732 Params: 3733 s = Unicode char buffer 3734 size = number of Py_UNICODE chars to encode 3735 encoding = encoding 3736 errors = error handling 3737 */ 3738 PyObject* PyUnicodeUCS4_Encode( 3739 Py_UNICODE* s, 3740 Py_ssize_t size, 3741 const(char)* encoding, 3742 const(char)* errors); 3743 /// ditto 3744 3745 alias PyUnicodeUCS4_Encode PyUnicode_Encode; 3746 3747 3748 /** Encodes a Unicode object and returns the result as Python object. 3749 */ 3750 PyObject* PyUnicodeUCS4_AsEncodedObject( 3751 PyObject* unicode, 3752 const(char)* encoding, 3753 const(char)* errors); 3754 /// ditto 3755 3756 alias PyUnicodeUCS4_AsEncodedObject PyUnicode_AsEncodedObject; 3757 3758 3759 /** Encodes a Unicode object and returns the result as Python string 3760 object. */ 3761 PyObject* PyUnicodeUCS4_AsEncodedString( 3762 PyObject* unicode, 3763 const(char)* encoding, 3764 const(char)* errors); 3765 /// ditto 3766 3767 alias PyUnicodeUCS4_AsEncodedString PyUnicode_AsEncodedString; 3768 3769 3770 version(Python_3_0_Or_Later) { 3771 /** Encodes a Unicode object and returns the result as Unicode 3772 object. */ 3773 /// Availability: >= 3.* 3774 PyObject* PyUnicodeUCS4_AsEncodedUnicode( 3775 PyObject* unicode, 3776 const(char)* encoding, 3777 const(char)* errors 3778 ); 3779 /// ditto 3780 3781 alias PyUnicodeUCS4_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 3782 3783 } 3784 3785 /** 3786 Params: 3787 string = UTF-7 encoded string 3788 length = size of string 3789 errors = error handling 3790 */ 3791 PyObject* PyUnicodeUCS4_DecodeUTF7( 3792 const(char)* string, 3793 Py_ssize_t length, 3794 const(char)* errors); 3795 /// ditto 3796 3797 alias PyUnicodeUCS4_DecodeUTF7 PyUnicode_DecodeUTF7; 3798 3799 3800 /** 3801 Params: 3802 data = Unicode char buffer 3803 length = number of Py_UNICODE chars to encode 3804 base64SetO = Encode RFC2152 Set O characters in base64 3805 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 3806 errors = error handling 3807 */ 3808 PyObject* PyUnicodeUCS4_EncodeUTF7( 3809 Py_UNICODE* data, 3810 Py_ssize_t length, 3811 int encodeSetO, 3812 int encodeWhiteSpace, 3813 const(char)* errors 3814 ); 3815 /// ditto 3816 3817 alias PyUnicodeUCS4_EncodeUTF7 PyUnicode_EncodeUTF7; 3818 3819 3820 /// _ 3821 PyObject* PyUnicodeUCS4_DecodeUTF8( 3822 const(char)* string, 3823 Py_ssize_t length, 3824 const(char)* errors); 3825 /// ditto 3826 3827 alias PyUnicodeUCS4_DecodeUTF8 PyUnicode_DecodeUTF8; 3828 3829 /// _ 3830 PyObject* PyUnicodeUCS4_DecodeUTF8Stateful( 3831 const(char)* string, 3832 Py_ssize_t length, 3833 const(char)* errors, 3834 Py_ssize_t* consumed 3835 ); 3836 /// ditto 3837 3838 alias PyUnicodeUCS4_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 3839 3840 /// _ 3841 PyObject* PyUnicodeUCS4_AsUTF8String(PyObject* unicode); 3842 /// ditto 3843 3844 alias PyUnicodeUCS4_AsUTF8String PyUnicode_AsUTF8String; 3845 3846 /// _ 3847 PyObject* PyUnicodeUCS4_EncodeUTF8( 3848 Py_UNICODE* data, 3849 Py_ssize_t length, 3850 const(char) *errors); 3851 /// ditto 3852 3853 alias PyUnicodeUCS4_EncodeUTF8 PyUnicode_EncodeUTF8; 3854 3855 3856 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 3857 the corresponding Unicode object. 3858 3859 errors (if non-NULL) defines the error handling. It defaults 3860 to "strict". 3861 3862 If byteorder is non-NULL, the decoder starts decoding using the 3863 given byte order: 3864 3865 *byteorder == -1: little endian 3866 *byteorder == 0: native order 3867 *byteorder == 1: big endian 3868 3869 In native mode, the first two bytes of the stream are checked for a 3870 BOM mark. If found, the BOM mark is analysed, the byte order 3871 adjusted and the BOM skipped. In the other modes, no BOM mark 3872 interpretation is done. After completion, *byteorder is set to the 3873 current byte order at the end of input data. 3874 3875 If byteorder is NULL, the codec starts in native order mode. 3876 3877 */ 3878 PyObject* PyUnicodeUCS4_DecodeUTF16( 3879 const(char)* string, 3880 Py_ssize_t length, 3881 const(char)* errors, 3882 int* byteorder); 3883 /// ditto 3884 3885 alias PyUnicodeUCS4_DecodeUTF16 PyUnicode_DecodeUTF16; 3886 3887 /** 3888 Params: 3889 string = UTF-16 encoded string 3890 length = size of string 3891 errors = error handling 3892 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3893 consumed = bytes consumed 3894 */ 3895 PyObject* PyUnicodeUCS4_DecodeUTF16Stateful( 3896 const(char)* string, 3897 Py_ssize_t length, 3898 const(char)* errors, 3899 int* byteorder, 3900 Py_ssize_t* consumed 3901 ); 3902 /// ditto 3903 3904 alias PyUnicodeUCS4_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 3905 3906 /** Returns a Python string using the UTF-16 encoding in native byte 3907 order. The string always starts with a BOM mark. */ 3908 PyObject* PyUnicodeUCS4_AsUTF16String(PyObject *unicode); 3909 /// ditto 3910 3911 alias PyUnicodeUCS4_AsUTF16String PyUnicode_AsUTF16String; 3912 3913 /** Returns a Python string object holding the UTF-16 encoded value of 3914 the Unicode data. 3915 3916 If byteorder is not 0, output is written according to the following 3917 byte order: 3918 3919 byteorder == -1: little endian 3920 byteorder == 0: native byte order (writes a BOM mark) 3921 byteorder == 1: big endian 3922 3923 If byteorder is 0, the output string will always start with the 3924 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3925 prepended. 3926 3927 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 3928 UCS-2. This trick makes it possible to add full UTF-16 capabilities 3929 at a later point without compromising the APIs. 3930 3931 */ 3932 PyObject* PyUnicodeUCS4_EncodeUTF16( 3933 Py_UNICODE* data, 3934 Py_ssize_t length, 3935 const(char)* errors, 3936 int byteorder 3937 ); 3938 /// ditto 3939 3940 alias PyUnicodeUCS4_EncodeUTF16 PyUnicode_EncodeUTF16; 3941 3942 3943 /// _ 3944 PyObject* PyUnicodeUCS4_DecodeUnicodeEscape( 3945 const(char)* string, 3946 Py_ssize_t length, 3947 const(char)* errors); 3948 /// ditto 3949 3950 alias PyUnicodeUCS4_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 3951 3952 /// _ 3953 PyObject* PyUnicodeUCS4_AsUnicodeEscapeString( 3954 PyObject* unicode); 3955 /// ditto 3956 3957 alias PyUnicodeUCS4_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 3958 3959 /// _ 3960 PyObject* PyUnicodeUCS4_EncodeUnicodeEscape( 3961 Py_UNICODE* data, 3962 Py_ssize_t length); 3963 /// ditto 3964 3965 alias PyUnicodeUCS4_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 3966 3967 /** 3968 Params: 3969 string = Raw-Unicode-Escape encoded string 3970 length = size of string 3971 errors = error handling 3972 */ 3973 PyObject* PyUnicodeUCS4_DecodeRawUnicodeEscape( 3974 const(char)* string, 3975 Py_ssize_t length, 3976 const(char)* errors); 3977 /// ditto 3978 3979 alias PyUnicodeUCS4_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 3980 3981 /// _ 3982 PyObject* PyUnicodeUCS4_AsRawUnicodeEscapeString(PyObject* unicode); 3983 /// ditto 3984 3985 alias PyUnicodeUCS4_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 3986 3987 /// _ 3988 PyObject* PyUnicodeUCS4_EncodeRawUnicodeEscape( 3989 Py_UNICODE* data, Py_ssize_t length); 3990 /// ditto 3991 3992 alias PyUnicodeUCS4_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 3993 3994 3995 /// _ 3996 PyObject* _PyUnicodeUCS4_DecodeUnicodeInternal( 3997 const(char)* string, 3998 Py_ssize_t length, 3999 const(char)* errors); 4000 /// ditto 4001 4002 alias _PyUnicodeUCS4_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 4003 4004 4005 /** 4006 Params: 4007 string = Latin-1 encoded string 4008 length = size of string 4009 errors = error handling 4010 */ 4011 PyObject* PyUnicodeUCS4_DecodeLatin1( 4012 const(char)* string, 4013 Py_ssize_t length, 4014 const(char)* errors); 4015 /// ditto 4016 4017 alias PyUnicodeUCS4_DecodeLatin1 PyUnicode_DecodeLatin1; 4018 4019 /// _ 4020 PyObject* PyUnicodeUCS4_AsLatin1String(PyObject *unicode); 4021 /// ditto 4022 4023 alias PyUnicodeUCS4_AsLatin1String PyUnicode_AsLatin1String; 4024 4025 /** 4026 Params: 4027 data = Unicode char buffer 4028 length = Number of Py_UNICODE chars to encode 4029 errors = error handling 4030 */ 4031 PyObject* PyUnicodeUCS4_EncodeLatin1( 4032 Py_UNICODE* data, 4033 Py_ssize_t length, 4034 const(char)* errors); 4035 /// ditto 4036 4037 alias PyUnicodeUCS4_EncodeLatin1 PyUnicode_EncodeLatin1; 4038 4039 4040 /** 4041 Params: 4042 data = Unicode char buffer 4043 length = Number of Py_UNICODE chars to encode 4044 errors = error handling 4045 */ 4046 PyObject* PyUnicodeUCS4_DecodeASCII( 4047 const(char)* string, 4048 Py_ssize_t length, 4049 const(char)* errors); 4050 /// ditto 4051 4052 alias PyUnicodeUCS4_DecodeASCII PyUnicode_DecodeASCII; 4053 4054 /// _ 4055 PyObject* PyUnicodeUCS4_AsASCIIString(PyObject *unicode); 4056 /// ditto 4057 4058 alias PyUnicodeUCS4_AsASCIIString PyUnicode_AsASCIIString; 4059 4060 /** 4061 Params: 4062 data = Unicode char buffer 4063 length = Number of Py_UNICODE chars to encode 4064 errors = error handling 4065 */ 4066 PyObject* PyUnicodeUCS4_EncodeASCII( 4067 Py_UNICODE* data, 4068 Py_ssize_t length, 4069 const(char)* errors); 4070 /// ditto 4071 4072 alias PyUnicodeUCS4_EncodeASCII PyUnicode_EncodeASCII; 4073 4074 4075 /** 4076 Params: 4077 string = Encoded string 4078 length = size of string 4079 mapping = character mapping (char ordinal -> unicode ordinal) 4080 errors = error handling 4081 */ 4082 PyObject* PyUnicodeUCS4_DecodeCharmap( 4083 const(char)* string, 4084 Py_ssize_t length, 4085 PyObject* mapping, 4086 const(char)* errors 4087 ); 4088 /// ditto 4089 4090 alias PyUnicodeUCS4_DecodeCharmap PyUnicode_DecodeCharmap; 4091 4092 /** 4093 Params: 4094 unicode = Unicode object 4095 mapping = character mapping (unicode ordinal -> char ordinal) 4096 */ 4097 PyObject* PyUnicodeUCS4_AsCharmapString( 4098 PyObject* unicode, 4099 PyObject* mapping); 4100 /// ditto 4101 4102 alias PyUnicodeUCS4_AsCharmapString PyUnicode_AsCharmapString; 4103 4104 /** 4105 Params: 4106 data = Unicode char buffer 4107 length = Number of Py_UNICODE chars to encode 4108 mapping = character mapping (unicode ordinal -> char ordinal) 4109 errors = error handling 4110 */ 4111 PyObject* PyUnicodeUCS4_EncodeCharmap( 4112 Py_UNICODE* data, 4113 Py_ssize_t length, 4114 PyObject* mapping, 4115 const(char)* errors 4116 ); 4117 /// ditto 4118 4119 alias PyUnicodeUCS4_EncodeCharmap PyUnicode_EncodeCharmap; 4120 4121 /** Translate a Py_UNICODE buffer of the given length by applying a 4122 character mapping table to it and return the resulting Unicode 4123 object. 4124 4125 The mapping table must map Unicode ordinal integers to Unicode 4126 ordinal integers or None (causing deletion of the character). 4127 4128 Mapping tables may be dictionaries or sequences. Unmapped character 4129 ordinals (ones which cause a LookupError) are left untouched and 4130 are copied as-is. 4131 4132 */ 4133 PyObject* PyUnicodeUCS4_TranslateCharmap( 4134 Py_UNICODE* data, 4135 Py_ssize_t length, 4136 PyObject* table, 4137 const(char)* errors 4138 ); 4139 /// ditto 4140 4141 alias PyUnicodeUCS4_TranslateCharmap PyUnicode_TranslateCharmap; 4142 4143 4144 version (Windows) { 4145 /// Availability: Windows only 4146 PyObject* PyUnicodeUCS4_DecodeMBCS( 4147 const(char)* string, 4148 Py_ssize_t length, 4149 const(char)* errors); 4150 /// ditto 4151 4152 alias PyUnicodeUCS4_DecodeMBCS PyUnicode_DecodeMBCS; 4153 4154 /// Availability: Windows only 4155 PyObject* PyUnicodeUCS4_AsMBCSString(PyObject* unicode); 4156 /// ditto 4157 4158 alias PyUnicodeUCS4_AsMBCSString PyUnicode_AsMBCSString; 4159 4160 /// Availability: Windows only 4161 PyObject* PyUnicodeUCS4_EncodeMBCS( 4162 Py_UNICODE* data, 4163 Py_ssize_t length, 4164 const(char)* errors); 4165 /// ditto 4166 4167 alias PyUnicodeUCS4_EncodeMBCS PyUnicode_EncodeMBCS; 4168 4169 } 4170 /** Takes a Unicode string holding a decimal value and writes it into 4171 an output buffer using standard ASCII digit codes. 4172 4173 The output buffer has to provide at least length+1 bytes of storage 4174 area. The output string is 0-terminated. 4175 4176 The encoder converts whitespace to ' ', decimal characters to their 4177 corresponding ASCII digit and all other Latin-1 characters except 4178 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 4179 are treated as errors. This includes embedded NULL bytes. 4180 4181 Error handling is defined by the errors argument: 4182 4183 NULL or "strict": raise a ValueError 4184 "ignore": ignore the wrong characters (these are not copied to the 4185 output buffer) 4186 "replace": replaces illegal characters with '?' 4187 4188 Returns 0 on success, -1 on failure. 4189 4190 */ 4191 int PyUnicodeUCS4_EncodeDecimal( 4192 Py_UNICODE* s, 4193 Py_ssize_t length, 4194 char* output, 4195 const(char)* errors); 4196 /// ditto 4197 4198 alias PyUnicodeUCS4_EncodeDecimal PyUnicode_EncodeDecimal; 4199 4200 4201 /** Concat two strings giving a new Unicode string. */ 4202 PyObject* PyUnicodeUCS4_Concat( 4203 PyObject* left, 4204 PyObject* right); 4205 /// ditto 4206 4207 alias PyUnicodeUCS4_Concat PyUnicode_Concat; 4208 4209 4210 version(Python_3_0_Or_Later) { 4211 /** Concat two strings and put the result in *pleft 4212 (sets *pleft to NULL on error) 4213 Params: 4214 pleft = Pointer to left string 4215 right = Right string 4216 */ 4217 /// Availability: 3.* 4218 4219 void PyUnicodeUCS4_Append( 4220 PyObject** pleft, 4221 PyObject* right 4222 ); 4223 /// ditto 4224 4225 alias PyUnicodeUCS4_Append PyUnicode_Append; 4226 4227 4228 /** Concat two strings, put the result in *pleft and drop the right object 4229 (sets *pleft to NULL on error) 4230 Params: 4231 pleft = Pointer to left string 4232 */ 4233 /// Availability: 3.* 4234 void PyUnicodeUCS4_AppendAndDel( 4235 PyObject** pleft, 4236 PyObject* right 4237 ); 4238 /// ditto 4239 4240 alias PyUnicodeUCS4_AppendAndDel PyUnicode_AppendAndDel; 4241 4242 } 4243 4244 /** Split a string giving a list of Unicode strings. 4245 4246 If sep is NULL, splitting will be done at all whitespace 4247 substrings. Otherwise, splits occur at the given separator. 4248 4249 At most maxsplit splits will be done. If negative, no limit is set. 4250 4251 Separators are not included in the resulting list. 4252 4253 */ 4254 PyObject* PyUnicodeUCS4_Split( 4255 PyObject* s, 4256 PyObject* sep, 4257 Py_ssize_t maxsplit); 4258 /// ditto 4259 4260 alias PyUnicodeUCS4_Split PyUnicode_Split; 4261 4262 4263 /** Ditto PyUnicode_Split, but split at line breaks. 4264 4265 CRLF is considered to be one line break. Line breaks are not 4266 included in the resulting list. */ 4267 PyObject* PyUnicodeUCS4_Splitlines( 4268 PyObject* s, 4269 int keepends); 4270 /// ditto 4271 4272 alias PyUnicodeUCS4_Splitlines PyUnicode_Splitlines; 4273 4274 4275 version(Python_2_5_Or_Later) { 4276 /** Partition a string using a given separator. */ 4277 /// Availability: >= 2.5 4278 PyObject* PyUnicodeUCS4_Partition( 4279 PyObject* s, 4280 PyObject* sep 4281 ); 4282 /// ditto 4283 4284 alias PyUnicodeUCS4_Partition PyUnicode_Partition; 4285 4286 4287 /** Partition a string using a given separator, searching from the end 4288 of the string. */ 4289 4290 PyObject* PyUnicodeUCS4_RPartition( 4291 PyObject* s, 4292 PyObject* sep 4293 ); 4294 /// ditto 4295 4296 alias PyUnicodeUCS4_RPartition PyUnicode_RPartition; 4297 4298 } 4299 4300 /** Split a string giving a list of Unicode strings. 4301 4302 If sep is NULL, splitting will be done at all whitespace 4303 substrings. Otherwise, splits occur at the given separator. 4304 4305 At most maxsplit splits will be done. But unlike PyUnicode_Split 4306 PyUnicode_RSplit splits from the end of the string. If negative, 4307 no limit is set. 4308 4309 Separators are not included in the resulting list. 4310 4311 */ 4312 PyObject* PyUnicodeUCS4_RSplit( 4313 PyObject* s, 4314 PyObject* sep, 4315 Py_ssize_t maxsplit); 4316 /// ditto 4317 4318 alias PyUnicodeUCS4_RSplit PyUnicode_RSplit; 4319 4320 4321 /** Translate a string by applying a character mapping table to it and 4322 return the resulting Unicode object. 4323 4324 The mapping table must map Unicode ordinal integers to Unicode 4325 ordinal integers or None (causing deletion of the character). 4326 4327 Mapping tables may be dictionaries or sequences. Unmapped character 4328 ordinals (ones which cause a LookupError) are left untouched and 4329 are copied as-is. 4330 4331 */ 4332 PyObject* PyUnicodeUCS4_Translate( 4333 PyObject* str, 4334 PyObject* table, 4335 const(char)* errors); 4336 /// ditto 4337 4338 alias PyUnicodeUCS4_Translate PyUnicode_Translate; 4339 4340 4341 /** Join a sequence of strings using the given separator and return 4342 the resulting Unicode string. */ 4343 PyObject* PyUnicodeUCS4_Join( 4344 PyObject* separator, 4345 PyObject* seq); 4346 /// ditto 4347 4348 alias PyUnicodeUCS4_Join PyUnicode_Join; 4349 4350 4351 /** Return 1 if substr matches str[start:end] at the given tail end, 0 4352 otherwise. */ 4353 Py_ssize_t PyUnicodeUCS4_Tailmatch( 4354 PyObject* str, 4355 PyObject* substr, 4356 Py_ssize_t start, 4357 Py_ssize_t end, 4358 int direction 4359 ); 4360 /// ditto 4361 4362 alias PyUnicodeUCS4_Tailmatch PyUnicode_Tailmatch; 4363 4364 4365 /** Return the first position of substr in str[start:end] using the 4366 given search direction or -1 if not found. -2 is returned in case 4367 an error occurred and an exception is set. */ 4368 Py_ssize_t PyUnicodeUCS4_Find( 4369 PyObject* str, 4370 PyObject* substr, 4371 Py_ssize_t start, 4372 Py_ssize_t end, 4373 int direction 4374 ); 4375 /// ditto 4376 4377 alias PyUnicodeUCS4_Find PyUnicode_Find; 4378 4379 4380 /** Count the number of occurrences of substr in str[start:end]. */ 4381 Py_ssize_t PyUnicodeUCS4_Count( 4382 PyObject* str, 4383 PyObject* substr, 4384 Py_ssize_t start, 4385 Py_ssize_t end); 4386 /// ditto 4387 4388 alias PyUnicodeUCS4_Count PyUnicode_Count; 4389 4390 4391 /** Replace at most maxcount occurrences of substr in str with replstr 4392 and return the resulting Unicode object. */ 4393 PyObject* PyUnicodeUCS4_Replace( 4394 PyObject* str, 4395 PyObject* substr, 4396 PyObject* replstr, 4397 Py_ssize_t maxcount 4398 ); 4399 /// ditto 4400 4401 alias PyUnicodeUCS4_Replace PyUnicode_Replace; 4402 4403 4404 /** Compare two strings and return -1, 0, 1 for less than, equal, 4405 greater than resp. */ 4406 int PyUnicodeUCS4_Compare(PyObject* left, PyObject* right); 4407 /// ditto 4408 4409 alias PyUnicodeUCS4_Compare PyUnicode_Compare; 4410 4411 version(Python_3_0_Or_Later) { 4412 /** Compare two strings and return -1, 0, 1 for less than, equal, 4413 greater than resp. 4414 Params: 4415 left = 4416 right = ASCII-encoded string 4417 */ 4418 /// Availability: 3.* 4419 int PyUnicodeUCS4_CompareWithASCIIString( 4420 PyObject* left, 4421 const(char)* right 4422 ); 4423 /// ditto 4424 4425 alias PyUnicodeUCS4_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 4426 4427 } 4428 4429 version(Python_2_5_Or_Later) { 4430 /** Rich compare two strings and return one of the following: 4431 4432 - NULL in case an exception was raised 4433 - Py_True or Py_False for successfuly comparisons 4434 - Py_NotImplemented in case the type combination is unknown 4435 4436 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 4437 case the conversion of the arguments to Unicode fails with a 4438 UnicodeDecodeError. 4439 4440 Possible values for op: 4441 4442 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 4443 4444 */ 4445 /// Availability: >= 2.5 4446 PyObject* PyUnicodeUCS4_RichCompare( 4447 PyObject* left, 4448 PyObject* right, 4449 int op 4450 ); 4451 /// ditto 4452 4453 alias PyUnicodeUCS4_RichCompare PyUnicode_RichCompare; 4454 4455 } 4456 4457 /** Apply a argument tuple or dictionary to a format string and return 4458 the resulting Unicode string. */ 4459 PyObject* PyUnicodeUCS4_Format(PyObject* format, PyObject* args); 4460 /// ditto 4461 4462 alias PyUnicodeUCS4_Format PyUnicode_Format; 4463 4464 4465 /** Checks whether element is contained in container and return 1/0 4466 accordingly. 4467 4468 element has to coerce to an one element Unicode string. -1 is 4469 returned in case of an error. */ 4470 int PyUnicodeUCS4_Contains(PyObject* container, PyObject* element); 4471 /// ditto 4472 4473 alias PyUnicodeUCS4_Contains PyUnicode_Contains; 4474 4475 4476 version(Python_3_0_Or_Later) { 4477 /** Checks whether argument is a valid identifier. */ 4478 /// Availability: 3.* 4479 int PyUnicodeUCS4_IsIdentifier(PyObject* s); 4480 /// ditto 4481 4482 alias PyUnicodeUCS4_IsIdentifier PyUnicode_IsIdentifier; 4483 4484 } 4485 4486 4487 /// _ 4488 int _PyUnicodeUCS4_IsLowercase(Py_UNICODE ch); 4489 /// ditto 4490 4491 alias _PyUnicodeUCS4_IsLowercase _PyUnicode_IsLowercase; 4492 4493 /// _ 4494 int _PyUnicodeUCS4_IsUppercase(Py_UNICODE ch); 4495 /// ditto 4496 4497 alias _PyUnicodeUCS4_IsUppercase _PyUnicode_IsUppercase; 4498 4499 /// _ 4500 int _PyUnicodeUCS4_IsTitlecase(Py_UNICODE ch); 4501 /// ditto 4502 4503 alias _PyUnicodeUCS4_IsTitlecase _PyUnicode_IsTitlecase; 4504 4505 /// _ 4506 int _PyUnicodeUCS4_IsWhitespace(Py_UNICODE ch); 4507 /// ditto 4508 4509 alias _PyUnicodeUCS4_IsWhitespace _PyUnicode_IsWhitespace; 4510 4511 /// _ 4512 int _PyUnicodeUCS4_IsLinebreak(Py_UNICODE ch); 4513 /// ditto 4514 4515 alias _PyUnicodeUCS4_IsLinebreak _PyUnicode_IsLinebreak; 4516 4517 /// _ 4518 Py_UNICODE _PyUnicodeUCS4_ToLowercase(Py_UNICODE ch); 4519 /// ditto 4520 4521 alias _PyUnicodeUCS4_ToLowercase _PyUnicode_ToLowercase; 4522 4523 /// _ 4524 Py_UNICODE _PyUnicodeUCS4_ToUppercase(Py_UNICODE ch); 4525 /// ditto 4526 4527 alias _PyUnicodeUCS4_ToUppercase _PyUnicode_ToUppercase; 4528 4529 /// _ 4530 Py_UNICODE _PyUnicodeUCS4_ToTitlecase(Py_UNICODE ch); 4531 /// ditto 4532 4533 alias _PyUnicodeUCS4_ToTitlecase _PyUnicode_ToTitlecase; 4534 4535 /// _ 4536 int _PyUnicodeUCS4_ToDecimalDigit(Py_UNICODE ch); 4537 /// ditto 4538 4539 alias _PyUnicodeUCS4_ToDecimalDigit _PyUnicode_ToDecimalDigit; 4540 4541 /// _ 4542 int _PyUnicodeUCS4_ToDigit(Py_UNICODE ch); 4543 /// ditto 4544 4545 alias _PyUnicodeUCS4_ToDigit _PyUnicode_ToDigit; 4546 4547 /// _ 4548 double _PyUnicodeUCS4_ToNumeric(Py_UNICODE ch); 4549 /// ditto 4550 4551 alias _PyUnicodeUCS4_ToNumeric _PyUnicode_ToNumeric; 4552 4553 /// _ 4554 int _PyUnicodeUCS4_IsDecimalDigit(Py_UNICODE ch); 4555 /// ditto 4556 4557 alias _PyUnicodeUCS4_IsDecimalDigit _PyUnicode_IsDecimalDigit; 4558 4559 /// _ 4560 int _PyUnicodeUCS4_IsDigit(Py_UNICODE ch); 4561 /// ditto 4562 4563 alias _PyUnicodeUCS4_IsDigit _PyUnicode_IsDigit; 4564 4565 /// _ 4566 int _PyUnicodeUCS4_IsNumeric(Py_UNICODE ch); 4567 /// ditto 4568 4569 alias _PyUnicodeUCS4_IsNumeric _PyUnicode_IsNumeric; 4570 4571 /// _ 4572 int _PyUnicodeUCS4_IsAlpha(Py_UNICODE ch); 4573 /// ditto 4574 4575 alias _PyUnicodeUCS4_IsAlpha _PyUnicode_IsAlpha; 4576 4577 } 4578 version(Python_3_0_Or_Later) { 4579 /// Availability: 3.* 4580 size_t Py_UNICODE_strlen(const(Py_UNICODE)* u); 4581 4582 /// Availability: 3.* 4583 Py_UNICODE* Py_UNICODE_strcpy(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4584 4585 version(Python_3_2_Or_Later) { 4586 /// Availability: >= 3.2 4587 Py_UNICODE* Py_UNICODE_strcat(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4588 } 4589 4590 /// Availability: 3.* 4591 Py_UNICODE* Py_UNICODE_strncpy( 4592 Py_UNICODE* s1, 4593 const(Py_UNICODE)* s2, 4594 size_t n); 4595 4596 /// Availability: 3.* 4597 int Py_UNICODE_strcmp( 4598 const(Py_UNICODE)* s1, 4599 const(Py_UNICODE)* s2 4600 ); 4601 4602 version(Python_3_2_Or_Later) { 4603 /// Availability: >= 3.2 4604 int Py_UNICODE_strncmp( 4605 const(Py_UNICODE)* s1, 4606 const(Py_UNICODE)* s2, 4607 size_t n 4608 ); 4609 } 4610 4611 /// Availability: 3.* 4612 Py_UNICODE* Py_UNICODE_strchr( 4613 const(Py_UNICODE)* s, 4614 Py_UNICODE c 4615 ); 4616 4617 version(Python_3_2_Or_Later) { 4618 /// Availability: >= 3.2 4619 Py_UNICODE* Py_UNICODE_strrchr( 4620 const(Py_UNICODE)* s, 4621 Py_UNICODE c 4622 ); 4623 } 4624 4625 version(Python_3_5_Or_Later) { 4626 /// Availability: >= 3.5 4627 PyObject* _PyUnicode_FormatLong(PyObject*, int, int, int); 4628 } 4629 4630 version(Python_3_2_Or_Later) { 4631 /** Create a copy of a unicode string ending with a nul character. Return NULL 4632 and raise a MemoryError exception on memory allocation failure, otherwise 4633 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 4634 /// Availability: >= 3.2 4635 4636 Py_UNICODE* PyUnicode_AsUnicodeCopy( 4637 PyObject* unicode 4638 ); 4639 } 4640 } 4641 4642 4643 /// _ 4644 int _PyUnicode_IsTitlecase( 4645 Py_UCS4 ch /* Unicode character */ 4646 ); 4647 4648 /// _ 4649 int _PyUnicode_IsXidStart( 4650 Py_UCS4 ch /* Unicode character */ 4651 ); 4652 /** Externally visible for str.strip(unicode) */ 4653 PyObject* _PyUnicode_XStrip(PyUnicodeObject* self, int striptype, 4654 PyObject *sepobj 4655 ); 4656 version(Python_3_0_Or_Later) { 4657 version(Python_3_2_Or_Later) { 4658 /** Using the current locale, insert the thousands grouping 4659 into the string pointed to by buffer. For the argument descriptions, 4660 see Objects/stringlib/localeutil.h */ 4661 /// Availability: >= 3.2 4662 Py_ssize_t _PyUnicode_InsertThousandsGroupingLocale( 4663 Py_UNICODE* buffer, 4664 Py_ssize_t n_buffer, 4665 Py_UNICODE* digits, 4666 Py_ssize_t n_digits, 4667 Py_ssize_t min_width); 4668 } 4669 4670 /** Using explicit passed-in values, insert the thousands grouping 4671 into the string pointed to by buffer. For the argument descriptions, 4672 see Objects/stringlib/localeutil.h */ 4673 /// Availability: 3.* 4674 Py_ssize_t _PyUnicode_InsertThousandsGrouping( 4675 Py_UNICODE* buffer, 4676 Py_ssize_t n_buffer, 4677 Py_UNICODE* digits, 4678 Py_ssize_t n_digits, 4679 Py_ssize_t min_width, 4680 const(char)* grouping, 4681 const(char)* thousands_sep); 4682 } 4683 4684 version(Python_3_2_Or_Later) { 4685 /// Availability: >= 3.2 4686 PyObject* PyUnicode_TransformDecimalToASCII( 4687 Py_UNICODE *s, /* Unicode buffer */ 4688 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 4689 ); 4690 /* --- File system encoding ---------------------------------------------- */ 4691 4692 /** ParseTuple converter: encode str objects to bytes using 4693 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 4694 /// Availability: >= 3.2 4695 int PyUnicode_FSConverter(PyObject*, void*); 4696 4697 /** ParseTuple converter: decode bytes objects to unicode using 4698 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 4699 /// Availability: >= 3.2 4700 int PyUnicode_FSDecoder(PyObject*, void*); 4701 4702 /** Decode a null-terminated string using Py_FileSystemDefaultEncoding 4703 and the "surrogateescape" error handler. 4704 4705 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4706 encoding. 4707 4708 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 4709 */ 4710 /// Availability: >= 3.2 4711 PyObject* PyUnicode_DecodeFSDefault( 4712 const(char)* s /* encoded string */ 4713 ); 4714 4715 /** Decode a string using Py_FileSystemDefaultEncoding 4716 and the "surrogateescape" error handler. 4717 4718 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4719 encoding. 4720 */ 4721 /// Availability: >= 3.2 4722 PyObject* PyUnicode_DecodeFSDefaultAndSize( 4723 const(char)* s, /* encoded string */ 4724 Py_ssize_t size /* size */ 4725 ); 4726 4727 /** Encode a Unicode object to Py_FileSystemDefaultEncoding with the 4728 "surrogateescape" error handler, and return bytes. 4729 4730 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4731 encoding. 4732 */ 4733 /// Availability: >= 3.2 4734 PyObject* PyUnicode_EncodeFSDefault( 4735 PyObject* unicode 4736 ); 4737 } 4738 4739 /* 4740 alias _PyUnicode_IsWhitespace Py_UNICODE_ISSPACE; 4741 alias _PyUnicode_IsLowercase Py_UNICODE_ISLOWER; 4742 alias _PyUnicode_IsUppercase Py_UNICODE_ISUPPER; 4743 alias _PyUnicode_IsTitlecase Py_UNICODE_ISTITLE; 4744 alias _PyUnicode_IsLinebreak Py_UNICODE_ISLINEBREAK; 4745 alias _PyUnicode_ToLowercase Py_UNICODE_TOLOWER; 4746 alias _PyUnicode_ToUppercase Py_UNICODE_TOUPPER; 4747 alias _PyUnicode_ToTitlecase Py_UNICODE_TOTITLE; 4748 alias _PyUnicode_IsDecimalDigit Py_UNICODE_ISDECIMAL; 4749 alias _PyUnicode_IsDigit Py_UNICODE_ISDIGIT; 4750 alias _PyUnicode_IsNumeric Py_UNICODE_ISNUMERIC; 4751 alias _PyUnicode_ToDecimalDigit Py_UNICODE_TODECIMAL; 4752 alias _PyUnicode_ToDigit Py_UNICODE_TODIGIT; 4753 alias _PyUnicode_ToNumeric Py_UNICODE_TONUMERIC; 4754 alias _PyUnicode_IsAlpha Py_UNICODE_ISALPHA; 4755 */ 4756 4757 /// _ 4758 int Py_UNICODE_ISALNUM()(Py_UNICODE ch) { 4759 return ( 4760 Py_UNICODE_ISALPHA(ch) 4761 || Py_UNICODE_ISDECIMAL(ch) 4762 || Py_UNICODE_ISDIGIT(ch) 4763 || Py_UNICODE_ISNUMERIC(ch) 4764 ); 4765 } 4766 4767 /// _ 4768 void Py_UNICODE_COPY()(void* target, void* source, size_t length) { 4769 memcpy(target, source, cast(uint)(length* Py_UNICODE.sizeof)); 4770 } 4771 4772 /// _ 4773 void Py_UNICODE_FILL()(Py_UNICODE* target, Py_UNICODE value, size_t length) { 4774 for (size_t i = 0; i < length; i++) { 4775 target[i] = value; 4776 } 4777 } 4778 4779 /// _ 4780 int Py_UNICODE_MATCH()(PyUnicodeObject* string, size_t offset, 4781 PyUnicodeObject* substring 4782 ) 4783 { 4784 return ( 4785 (*(string.str + offset) == *(substring.str)) 4786 && !memcmp(string.str + offset, substring.str, 4787 substring.length * Py_UNICODE.sizeof 4788 ) 4789 ); 4790 } 4791 4792