Changeset 2780
- Timestamp:
- Aug 15, 2007, 11:02:56 PM (15 years ago)
- Location:
- trunk/libtransmission
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/libtransmission/metainfo.c
r2747 r2780 57 57 static int parseFiles( tr_info_t * inf, benc_val_t * name, 58 58 benc_val_t * files, benc_val_t * length ); 59 static void strcatUTF8( char *, int, const char *, int );60 59 61 60 /*********************************************************************** … … 206 205 if( NULL != val && TYPE_STR == val->type ) 207 206 { 208 str catUTF8( inf->comment, sizeof( inf->comment ), val->val.s.s, 0 );207 strlcat_utf8( inf->comment, val->val.s.s, sizeof( inf->comment ), 0 ); 209 208 } 210 209 … … 213 212 if( NULL != val && TYPE_STR == val->type ) 214 213 { 215 str catUTF8( inf->creator, sizeof( inf->creator ), val->val.s.s, 0 );214 strlcat_utf8( inf->creator, val->val.s.s, sizeof( inf->creator ), 0 ); 216 215 } 217 216 … … 371 370 } 372 371 373 str catUTF8( buf, size, prefix, 0 );372 strlcat_utf8( buf, prefix, size, 0 ); 374 373 for( ii = 0; jj > ii; ii++ ) 375 374 { 376 str catUTF8( buf, size, TR_PATH_DELIMITER_STR, 0 );377 str catUTF8( buf, size, list[ii], 1);375 strlcat_utf8( buf, TR_PATH_DELIMITER_STR, size, 0 ); 376 strlcat_utf8( buf, list[ii], size, TR_PATH_DELIMITER ); 378 377 } 379 378 free( list ); … … 691 690 } 692 691 693 strcatUTF8( inf->name, sizeof( inf->name ), name->val.s.s, 1 ); 692 strlcat_utf8( inf->name, name->val.s.s, sizeof( inf->name ), 693 TR_PATH_DELIMITER ); 694 694 if( '\0' == inf->name[0] ) 695 695 { … … 745 745 } 746 746 747 str catUTF8( inf->files[0].name, sizeof( inf->files[0].name ),748 name->val.s.s, 1);747 strlcat_utf8( inf->files[0].name, name->val.s.s, 748 sizeof( inf->files[0].name ), TR_PATH_DELIMITER ); 749 749 750 750 inf->files[0].length = length->val.i; … … 760 760 return TR_OK; 761 761 } 762 763 /***********************************************************************764 * strcatUTF8765 ***********************************************************************766 * According to the official specification, all strings in the torrent767 * file are supposed to be UTF-8 encoded. However, there are768 * non-compliant torrents around... If we encounter an invalid UTF-8769 * character, we assume it is ISO 8859-1 and convert it to UTF-8.770 **********************************************************************/771 #define WANTBYTES( want, got ) \772 if( (want) > (got) ) { return; } else { (got) -= (want); }773 static void strcatUTF8( char * s, int len, const char * append, int deslash )774 {775 const char * p;776 777 /* don't overwrite the nul at the end */778 len--;779 780 /* Go to the end of the destination string */781 while( s[0] )782 {783 s++;784 len--;785 }786 787 /* Now start appending, converting on the fly if necessary */788 for( p = append; p[0]; )789 {790 /* skip over / if requested */791 if( deslash && '/' == p[0] )792 {793 p++;794 continue;795 }796 797 if( !( p[0] & 0x80 ) )798 {799 /* ASCII character */800 WANTBYTES( 1, len );801 *(s++) = *(p++);802 continue;803 }804 805 if( ( p[0] & 0xE0 ) == 0xC0 && ( p[1] & 0xC0 ) == 0x80 )806 {807 /* 2-bytes UTF-8 character */808 WANTBYTES( 2, len );809 *(s++) = *(p++); *(s++) = *(p++);810 continue;811 }812 813 if( ( p[0] & 0xF0 ) == 0xE0 && ( p[1] & 0xC0 ) == 0x80 &&814 ( p[2] & 0xC0 ) == 0x80 )815 {816 /* 3-bytes UTF-8 character */817 WANTBYTES( 3, len );818 *(s++) = *(p++); *(s++) = *(p++);819 *(s++) = *(p++);820 continue;821 }822 823 if( ( p[0] & 0xF8 ) == 0xF0 && ( p[1] & 0xC0 ) == 0x80 &&824 ( p[2] & 0xC0 ) == 0x80 && ( p[3] & 0xC0 ) == 0x80 )825 {826 /* 4-bytes UTF-8 character */827 WANTBYTES( 4, len );828 *(s++) = *(p++); *(s++) = *(p++);829 *(s++) = *(p++); *(s++) = *(p++);830 continue;831 }832 833 /* ISO 8859-1 -> UTF-8 conversion */834 WANTBYTES( 2, len );835 *(s++) = 0xC0 | ( ( *p & 0xFF ) >> 6 );836 *(s++) = 0x80 | ( *(p++) & 0x3F );837 }838 } -
trunk/libtransmission/utils.c
r2737 r2780 730 730 #endif 731 731 } 732 733 #define WANTBYTES( want, got ) \ 734 if( (want) > (got) ) { return; } else { (got) -= (want); } 735 void 736 strlcat_utf8( void * dest, const void * src, size_t len, char skip ) 737 { 738 char * s = dest; 739 const char * append = src; 740 const char * p; 741 742 /* don't overwrite the nul at the end */ 743 len--; 744 745 /* Go to the end of the destination string */ 746 while( s[0] ) 747 { 748 s++; 749 len--; 750 } 751 752 /* Now start appending, converting on the fly if necessary */ 753 for( p = append; p[0]; ) 754 { 755 /* skip over the requested character */ 756 if( skip == p[0] ) 757 { 758 p++; 759 continue; 760 } 761 762 if( !( p[0] & 0x80 ) ) 763 { 764 /* ASCII character */ 765 WANTBYTES( 1, len ); 766 *(s++) = *(p++); 767 continue; 768 } 769 770 if( ( p[0] & 0xE0 ) == 0xC0 && ( p[1] & 0xC0 ) == 0x80 ) 771 { 772 /* 2-bytes UTF-8 character */ 773 WANTBYTES( 2, len ); 774 *(s++) = *(p++); *(s++) = *(p++); 775 continue; 776 } 777 778 if( ( p[0] & 0xF0 ) == 0xE0 && ( p[1] & 0xC0 ) == 0x80 && 779 ( p[2] & 0xC0 ) == 0x80 ) 780 { 781 /* 3-bytes UTF-8 character */ 782 WANTBYTES( 3, len ); 783 *(s++) = *(p++); *(s++) = *(p++); 784 *(s++) = *(p++); 785 continue; 786 } 787 788 if( ( p[0] & 0xF8 ) == 0xF0 && ( p[1] & 0xC0 ) == 0x80 && 789 ( p[2] & 0xC0 ) == 0x80 && ( p[3] & 0xC0 ) == 0x80 ) 790 { 791 /* 4-bytes UTF-8 character */ 792 WANTBYTES( 4, len ); 793 *(s++) = *(p++); *(s++) = *(p++); 794 *(s++) = *(p++); *(s++) = *(p++); 795 continue; 796 } 797 798 /* ISO 8859-1 -> UTF-8 conversion */ 799 WANTBYTES( 2, len ); 800 *(s++) = 0xC0 | ( ( *p & 0xFF ) >> 6 ); 801 *(s++) = 0x80 | ( *(p++) & 0x3F ); 802 } 803 } 804 805 size_t 806 bufsize_utf8( const void * vstr, int * changed ) 807 { 808 const char * str = vstr; 809 size_t ii, grow; 810 811 if( NULL != changed ) 812 *changed = 0; 813 814 ii = 0; 815 grow = 1; 816 while( '\0' != str[ii] ) 817 { 818 if( !( str[ii] & 0x80 ) ) 819 /* ASCII character */ 820 ii++; 821 else if( ( str[ii] & 0xE0 ) == 0xC0 && ( str[ii+1] & 0xC0 ) == 0x80 ) 822 /* 2-bytes UTF-8 character */ 823 ii += 2; 824 else if( ( str[ii] & 0xF0 ) == 0xE0 && ( str[ii+1] & 0xC0 ) == 0x80 && 825 ( str[ii+2] & 0xC0 ) == 0x80 ) 826 /* 3-bytes UTF-8 character */ 827 ii += 3; 828 else if( ( str[ii] & 0xF8 ) == 0xF0 && ( str[ii+1] & 0xC0 ) == 0x80 && 829 ( str[ii+2] & 0xC0 ) == 0x80 && ( str[ii+3] & 0xC0 ) == 0x80 ) 830 /* 4-bytes UTF-8 character */ 831 ii += 4; 832 else 833 { 834 /* ISO 8859-1 -> UTF-8 conversion */ 835 ii++; 836 grow++; 837 if( NULL != changed ) 838 *changed = 1; 839 } 840 } 841 842 return ii + grow; 843 } -
trunk/libtransmission/utils.h
r2757 r2780 89 89 void tr_wait( uint64_t delay_milliseconds ); 90 90 91 /*********************************************************************** 92 * strlcat_utf8 93 *********************************************************************** 94 * According to the official specification, all strings in the torrent 95 * file are supposed to be UTF-8 encoded. However, there are 96 * non-compliant torrents around... If we encounter an invalid UTF-8 97 * character, we assume it is ISO 8859-1 and convert it to UTF-8. 98 **********************************************************************/ 99 void strlcat_utf8( void *, const void *, size_t, char ); 100 size_t bufsize_utf8( const void *, int * ); 101 91 102 /*** 92 103 ****
Note: See TracChangeset
for help on using the changeset viewer.