Changeset 8799
- Timestamp:
- Jul 13, 2009, 10:52:08 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/libtransmission/JSON_parser.c
r8409 r8799 33 33 34 34 Changelog: 35 2009-05-17 36 Incorporated benrudiak@googlemail.com fix for UTF16 decoding. 37 35 38 2009-05-14 36 39 Fixed float parsing bug related to a locale being set that didn't … … 66 69 67 70 #include "JSON_parser.h" 68 #include "ConvertUTF.h"69 71 70 72 #ifdef _MSC_VER … … 88 90 #endif 89 91 92 typedef unsigned short UTF16; 90 93 91 94 struct JSON_parser_struct { … … 93 96 void* ctx; 94 97 signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually; 95 UTF16 utf16_ decode_buffer[2];98 UTF16 utf16_high_surrogate; 96 99 long depth; 97 100 long top; … … 237 240 IX = -20, /* integer detected by 1-9 */ 238 241 EX = -21, /* next char is escaped */ 239 UC = -22 ,/* Unicode character read */242 UC = -22 /* Unicode character read */ 240 243 }; 241 244 … … 518 521 } 519 522 523 #define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800) 524 #define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00) 525 #define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000) 526 static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 }; 527 520 528 static int decode_unicode_char(JSON_parser jc) 521 529 { 522 const unsigned chars = jc->utf16_decode_buffer[0] ? 2 : 1;523 530 int i; 524 UTF16 *uc = chars == 1 ? &jc->utf16_decode_buffer[0] : &jc->utf16_decode_buffer[1]; 525 UTF16 x; 531 unsigned uc = 0; 526 532 char* p; 533 int trail_bytes; 527 534 528 535 assert(jc->parse_buffer_count >= 6); … … 530 537 p = &jc->parse_buffer[jc->parse_buffer_count - 4]; 531 538 532 for (i = 0; i < 4; ++i, ++p) {533 x = *p;539 for (i = 12; i >= 0; i -= 4, ++p) { 540 unsigned x = *p; 534 541 535 542 if (x >= 'a') { … … 538 545 x -= ('A' - 10); 539 546 } else { 540 x &= ~ ((UTF16) 0x30);547 x &= ~0x30u; 541 548 } 542 549 543 550 assert(x < 16); 544 551 545 *uc |= x << ((3u - i) << 2);546 } 547 548 /* clear UTF-16 char f orm buffer */552 uc |= x << i; 553 } 554 555 /* clear UTF-16 char from buffer */ 549 556 jc->parse_buffer_count -= 6; 550 557 jc->parse_buffer[jc->parse_buffer_count] = 0; 551 558 552 559 /* attempt decoding ... */ 553 { 554 UTF8* dec_start = (UTF8*)&jc->parse_buffer[jc->parse_buffer_count]; 555 UTF8* dec_start_dup = dec_start; 556 UTF8* dec_end = dec_start + 6; 557 558 const UTF16* enc_start = &jc->utf16_decode_buffer[0]; 559 const UTF16* enc_end = enc_start + chars; 560 561 const ConversionResult result = ConvertUTF16toUTF8( 562 &enc_start, enc_end, &dec_start, dec_end, strictConversion); 563 564 const size_t new_chars = dec_start - dec_start_dup; 565 566 /* was it a surrogate UTF-16 char? */ 567 if (chars == 1 && result == sourceExhausted) { 560 if (jc->utf16_high_surrogate) { 561 if (IS_LOW_SURROGATE(uc)) { 562 uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc); 563 trail_bytes = 3; 564 jc->utf16_high_surrogate = 0; 565 } else { 566 /* high surrogate without a following low surrogate */ 567 return false; 568 } 569 } else { 570 if (uc < 0x80) { 571 trail_bytes = 0; 572 } else if (uc < 0x800) { 573 trail_bytes = 1; 574 } else if (IS_HIGH_SURROGATE(uc)) { 575 /* save the high surrogate and wait for the low surrogate */ 576 jc->utf16_high_surrogate = uc; 568 577 return true; 569 } 570 571 if (result != conversionOK) { 578 } else if (IS_LOW_SURROGATE(uc)) { 579 /* low surrogate without a preceding high surrogate */ 572 580 return false; 573 } 574 575 /* NOTE: clear decode buffer to resume string reading, 576 otherwise we continue to read UTF-16 */ 577 jc->utf16_decode_buffer[0] = 0; 578 579 assert(new_chars <= 6); 580 581 jc->parse_buffer_count += new_chars; 582 jc->parse_buffer[jc->parse_buffer_count] = 0; 583 } 581 } else { 582 trail_bytes = 2; 583 } 584 } 585 586 jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]); 587 588 for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) { 589 jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80); 590 } 591 592 jc->parse_buffer[jc->parse_buffer_count] = 0; 584 593 585 594 return true; … … 695 704 } 696 705 /* check if we need to read a second UTF-16 char */ 697 if (jc->utf16_ decode_buffer[0]) {706 if (jc->utf16_high_surrogate) { 698 707 jc->state = D1; 699 708 } else {
Note: See TracChangeset
for help on using the changeset viewer.