Changeset 10963


Ignore:
Timestamp:
Jul 7, 2010, 4:48:23 PM (11 years ago)
Author:
charles
Message:

(2.0x trunk) #3397 "checksum errors when downloading files whose names are encoded in iso-8859-1" -- fixed

Location:
trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/configure.ac

    r10927 r10963  
    105105AC_HEADER_TIME
    106106
    107 AC_CHECK_FUNCS([pread pwrite lrintf strlcpy daemon dirname basename strcasecmp localtime_r fallocate64 posix_fallocate memmem strtold syslog valloc getpagesize posix_memalign clearenv])
     107AC_CHECK_FUNCS([iconv_open pread pwrite lrintf strlcpy daemon dirname basename strcasecmp localtime_r fallocate64 posix_fallocate memmem strtold syslog valloc getpagesize posix_memalign clearenv])
    108108AC_PROG_INSTALL
    109109AC_PROG_MAKE_SET
  • trunk/libtransmission/metainfo.c

    r10774 r10963  
    170170        }
    171171
    172         *setme = tr_utf8clean( (char*)EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ), NULL );
     172        *setme = tr_utf8clean( (char*)EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
    173173        /* fprintf( stderr, "[%s]\n", *setme ); */
    174174        evbuffer_free( buf );
     
    413413    tr_benc *       infoDict = NULL;
    414414    tr_benc *       meta = (tr_benc *) meta_in;
    415     tr_bool         err;
    416415    tr_bool         b;
    417416    tr_bool         isMagnet = FALSE;
     
    472471            return "name";
    473472        tr_free( inf->name );
    474         inf->name = tr_utf8clean( str, -1, &err );
     473        inf->name = tr_utf8clean( str, -1 );
    475474    }
    476475
     
    480479            str = "";
    481480    tr_free( inf->comment );
    482     inf->comment = tr_utf8clean( str, -1, &err );
     481    inf->comment = tr_utf8clean( str, -1 );
    483482
    484483    /* created by */
     
    487486            str = "";
    488487    tr_free( inf->creator );
    489     inf->creator = tr_utf8clean( str, -1, &err );
     488    inf->creator = tr_utf8clean( str, -1 );
    490489
    491490    /* creation date */
  • trunk/libtransmission/utils-test.c

    r10931 r10963  
    175175    const char * in;
    176176    char * out;
    177     tr_bool err;
    178177
    179178    in = "hello world";
    180     out = tr_utf8clean( in, -1, &err );
    181     check( err == FALSE )
     179    out = tr_utf8clean( in, -1 );
    182180    check( out != NULL )
    183181    check( !strcmp( out, in ) )
     
    185183
    186184    in = "hello world";
    187     out = tr_utf8clean( in, 5, &err );
    188     check( err == FALSE )
     185    out = tr_utf8clean( in, 5 );
    189186    check( out != NULL )
    190187    check( !strcmp( out, "hello" ) )
     
    193190    /* this version is not utf-8 */
    194191    in = "Òðóäíî áûòü Áîãîì";
    195     out = tr_utf8clean( in, 17, &err );
    196     check( out != NULL )
    197     check( err != 0 )
     192    out = tr_utf8clean( in, 17 );
     193    check( out != NULL )
    198194    check( strlen( out ) == 17 )
    199195    check( tr_utf8_validate( out, -1, NULL ) )
     
    202198    /* same string, but utf-8 clean */
    203199    in = "ÒðóÀíî áûòÌ Áîãîì";
    204     out = tr_utf8clean( in, -1, &err );
    205     check( out != NULL )
    206     check( !err );
     200    out = tr_utf8clean( in, -1 );
     201    check( out != NULL )
    207202    check( tr_utf8_validate( out, -1, NULL ) )
    208203    check ( !strcmp( in, out ) )
  • trunk/libtransmission/utils.c

    r10956 r10963  
    1717#if defined(SYS_DARWIN)
    1818 #define HAVE_GETPAGESIZE
     19 #define HAVE_ICONV_OPEN
    1920 #define HAVE_VALLOC
    2021 #undef HAVE_POSIX_MEMALIGN /* not supported on OS X 10.5 and lower */
     
    3132#include <time.h> /* nanosleep() */
    3233
     34#ifdef HAVE_ICONV_OPEN
     35 #include <iconv.h>
     36#endif
    3337#include <libgen.h> /* basename() */
    3438#include <sys/time.h>
     
    11621166***/
    11631167
    1164 char*
    1165 tr_utf8clean( const char * str, int max_len, tr_bool * err )
     1168static char*
     1169strip_non_utf8( const char * in, size_t inlen )
    11661170{
    11671171    char * ret;
    11681172    const char * end;
     1173    const char zero = '\0';
     1174    struct evbuffer * buf = evbuffer_new( );
     1175 
     1176    while( !tr_utf8_validate( in, inlen, &end ) )
     1177    {
     1178        const int good_len = end - in;
     1179
     1180        evbuffer_add( buf, in, good_len );
     1181        inlen -= ( good_len + 1 );
     1182        in += ( good_len + 1 );
     1183        evbuffer_add( buf, "?", 1 );
     1184    }
     1185 
     1186    evbuffer_add( buf, in, inlen );
     1187    evbuffer_add( buf, &zero, 1 );
     1188    ret = tr_memdup( EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
     1189    evbuffer_free( buf );
     1190    return ret;
     1191}
     1192
     1193static char*
     1194to_utf8( const char * in, size_t inlen )
     1195{
     1196    char * ret = NULL;
     1197
     1198#ifdef HAVE_ICONV_OPEN
     1199    int i;
     1200    const char * encodings[] = { "CURRENT", "ISO-8859-15" };
     1201    const int encoding_count = sizeof(encodings) / sizeof(encodings[1]);
     1202    const size_t buflen = inlen*4 + 10;
     1203    char * out = tr_new( char, buflen );
     1204
     1205    for( i=0; !ret && i<encoding_count; ++i )
     1206    {
     1207        char * inbuf = (char*) in;
     1208        char * outbuf = out;
     1209        size_t inbytesleft = inlen;
     1210        size_t outbytesleft = buflen;
     1211        const char * test_encoding = encodings[i];
     1212
     1213        iconv_t cd = iconv_open( "UTF-8", test_encoding );
     1214        if( cd != (iconv_t)-1 ) {
     1215            if( iconv( cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft ) != (size_t)-1 )
     1216                ret = tr_strndup( out, buflen-outbytesleft );
     1217            iconv_close( cd );
     1218        }
     1219    }
     1220#endif
     1221
     1222    if( ret == NULL )
     1223        ret = strip_non_utf8( in, inlen );
     1224
     1225    return ret;
     1226}
     1227
     1228char*
     1229tr_utf8clean( const char * str, int max_len )
     1230{
     1231    char * ret;
     1232    const char * end;
    11691233
    11701234    if( max_len < 0 )
    11711235        max_len = (int) strlen( str );
    11721236
    1173     if( err != NULL )
    1174         *err = FALSE;
    1175 
    11761237    if( tr_utf8_validate( str, max_len, &end  ) )
    1177     {
    11781238        ret = tr_strndup( str, max_len );
    1179     }
    11801239    else
    1181     {
    1182         const char zero = '\0';
    1183         struct evbuffer * buf = evbuffer_new( );
    1184 
    1185         while( !tr_utf8_validate ( str, max_len, &end ) )
    1186         {
    1187             const int good_len = end - str;
    1188 
    1189             evbuffer_add( buf, str, good_len );
    1190             max_len -= ( good_len + 1 );
    1191             str += ( good_len + 1 );
    1192             evbuffer_add( buf, "?", 1 );
    1193 
    1194             if( err != NULL )
    1195                 *err = TRUE;
    1196         }
    1197 
    1198         evbuffer_add( buf, str, max_len );
    1199         evbuffer_add( buf, &zero, 1 );
    1200         ret = tr_memdup( EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
    1201         evbuffer_free( buf );
    1202     }
     1240        ret = to_utf8( str, max_len );
    12031241
    12041242    assert( tr_utf8_validate( ret, -1, NULL ) );
  • trunk/libtransmission/utils.h

    r10956 r10963  
    276276 * @param str the string to make a clean copy of
    277277 * @param len the length of the string to copy.  If -1, the entire string is used.
    278  * @param err if an error occurs and err is non-NULL, it's set to TRUE.
    279  */
    280 char* tr_utf8clean( const char * str, int len, tr_bool * err ) TR_GNUC_MALLOC;
     278 */
     279char* tr_utf8clean( const char * str, int len ) TR_GNUC_MALLOC;
    281280
    282281
Note: See TracChangeset for help on using the changeset viewer.