1 | /* |
---|
2 | * This file Copyright (C) 2008-2010 Mnemosyne LLC |
---|
3 | * |
---|
4 | * This file is licensed by the GPL version 2. Works owned by the |
---|
5 | * Transmission project are granted a special exemption to clause 2(b) |
---|
6 | * so that the bulk of its code can remain under the MIT license. |
---|
7 | * This exemption does not extend to derived works not owned by |
---|
8 | * the Transmission project. |
---|
9 | * |
---|
10 | * $Id: web.c 11398 2010-11-11 15:31:11Z charles $ |
---|
11 | */ |
---|
12 | |
---|
13 | #ifdef WIN32 |
---|
14 | #include <ws2tcpip.h> |
---|
15 | #else |
---|
16 | #include <sys/select.h> |
---|
17 | #endif |
---|
18 | |
---|
19 | #include <curl/curl.h> |
---|
20 | #include <event.h> |
---|
21 | |
---|
22 | #include "transmission.h" |
---|
23 | #include "list.h" |
---|
24 | #include "net.h" /* tr_address */ |
---|
25 | #include "platform.h" /* mutex */ |
---|
26 | #include "session.h" |
---|
27 | #include "trevent.h" /* tr_runInEventThread() */ |
---|
28 | #include "utils.h" |
---|
29 | #include "version.h" /* User-Agent */ |
---|
30 | #include "web.h" |
---|
31 | |
---|
32 | #if LIBCURL_VERSION_NUM >= 0x070F06 /* CURLOPT_SOCKOPT* was added in 7.15.6 */ |
---|
33 | #define USE_LIBCURL_SOCKOPT |
---|
34 | #endif |
---|
35 | |
---|
36 | enum |
---|
37 | { |
---|
38 | THREADFUNC_MAX_SLEEP_MSEC = 1000, |
---|
39 | }; |
---|
40 | |
---|
41 | #if 0 |
---|
42 | #define dbgmsg(...) \ |
---|
43 | do { \ |
---|
44 | fprintf( stderr, __VA_ARGS__ ); \ |
---|
45 | fprintf( stderr, "\n" ); \ |
---|
46 | } while( 0 ) |
---|
47 | #else |
---|
48 | #define dbgmsg( ... ) \ |
---|
49 | do { \ |
---|
50 | if( tr_deepLoggingIsActive( ) ) \ |
---|
51 | tr_deepLog( __FILE__, __LINE__, "web", __VA_ARGS__ ); \ |
---|
52 | } while( 0 ) |
---|
53 | #endif |
---|
54 | |
---|
55 | /*** |
---|
56 | **** |
---|
57 | ***/ |
---|
58 | |
---|
59 | struct tr_web |
---|
60 | { |
---|
61 | int close_mode; |
---|
62 | tr_list * tasks; |
---|
63 | tr_lock * taskLock; |
---|
64 | }; |
---|
65 | |
---|
66 | |
---|
67 | /*** |
---|
68 | **** |
---|
69 | ***/ |
---|
70 | |
---|
71 | struct tr_web_task |
---|
72 | { |
---|
73 | long code; |
---|
74 | struct evbuffer * response; |
---|
75 | char * url; |
---|
76 | char * range; |
---|
77 | tr_session * session; |
---|
78 | tr_web_done_func * done_func; |
---|
79 | void * done_func_user_data; |
---|
80 | }; |
---|
81 | |
---|
82 | static void |
---|
83 | task_free( struct tr_web_task * task ) |
---|
84 | { |
---|
85 | evbuffer_free( task->response ); |
---|
86 | tr_free( task->range ); |
---|
87 | tr_free( task->url ); |
---|
88 | tr_free( task ); |
---|
89 | } |
---|
90 | |
---|
91 | /*** |
---|
92 | **** |
---|
93 | ***/ |
---|
94 | |
---|
95 | static size_t |
---|
96 | writeFunc( void * ptr, size_t size, size_t nmemb, void * vtask ) |
---|
97 | { |
---|
98 | const size_t byteCount = size * nmemb; |
---|
99 | struct tr_web_task * task = vtask; |
---|
100 | evbuffer_add( task->response, ptr, byteCount ); |
---|
101 | dbgmsg( "wrote %zu bytes to task %p's buffer", byteCount, task ); |
---|
102 | return byteCount; |
---|
103 | } |
---|
104 | |
---|
105 | #ifdef USE_LIBCURL_SOCKOPT |
---|
106 | static int |
---|
107 | sockoptfunction( void * vtask, curl_socket_t fd, curlsocktype purpose UNUSED ) |
---|
108 | { |
---|
109 | struct tr_web_task * task = vtask; |
---|
110 | const tr_bool isScrape = strstr( task->url, "scrape" ) != NULL; |
---|
111 | const tr_bool isAnnounce = strstr( task->url, "announce" ) != NULL; |
---|
112 | |
---|
113 | /* announce and scrape requests have tiny payloads. */ |
---|
114 | if( isScrape || isAnnounce ) |
---|
115 | { |
---|
116 | const int sndbuf = 1024; |
---|
117 | const int rcvbuf = isScrape ? 2048 : 3072; |
---|
118 | setsockopt( fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf) ); |
---|
119 | setsockopt( fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf) ); |
---|
120 | } |
---|
121 | |
---|
122 | /* return nonzero if this function encountered an error */ |
---|
123 | return 0; |
---|
124 | } |
---|
125 | #endif |
---|
126 | |
---|
127 | static long |
---|
128 | getTimeoutFromURL( const struct tr_web_task * task ) |
---|
129 | { |
---|
130 | long timeout; |
---|
131 | const tr_session * session = task->session; |
---|
132 | |
---|
133 | if( !session || session->isClosed ) timeout = 20L; |
---|
134 | else if( strstr( task->url, "scrape" ) != NULL ) timeout = 30L; |
---|
135 | else if( strstr( task->url, "announce" ) != NULL ) timeout = 90L; |
---|
136 | else timeout = 240L; |
---|
137 | |
---|
138 | return timeout; |
---|
139 | } |
---|
140 | |
---|
141 | static CURL * |
---|
142 | createEasy( tr_session * s, struct tr_web_task * task ) |
---|
143 | { |
---|
144 | const tr_address * addr; |
---|
145 | CURL * e = curl_easy_init( ); |
---|
146 | const long verbose = getenv( "TR_CURL_VERBOSE" ) != NULL; |
---|
147 | char * cookie_filename = tr_buildPath( s->configDir, "cookies.txt", NULL ); |
---|
148 | |
---|
149 | curl_easy_setopt( e, CURLOPT_AUTOREFERER, 1L ); |
---|
150 | curl_easy_setopt( e, CURLOPT_COOKIEFILE, cookie_filename ); |
---|
151 | curl_easy_setopt( e, CURLOPT_ENCODING, "gzip;q=1.0, deflate, identity" ); |
---|
152 | curl_easy_setopt( e, CURLOPT_FOLLOWLOCATION, 1L ); |
---|
153 | curl_easy_setopt( e, CURLOPT_MAXREDIRS, -1L ); |
---|
154 | curl_easy_setopt( e, CURLOPT_NOSIGNAL, 1L ); |
---|
155 | curl_easy_setopt( e, CURLOPT_PRIVATE, task ); |
---|
156 | #ifdef USE_LIBCURL_SOCKOPT |
---|
157 | curl_easy_setopt( e, CURLOPT_SOCKOPTFUNCTION, sockoptfunction ); |
---|
158 | curl_easy_setopt( e, CURLOPT_SOCKOPTDATA, task ); |
---|
159 | #endif |
---|
160 | curl_easy_setopt( e, CURLOPT_SSL_VERIFYHOST, 0L ); |
---|
161 | curl_easy_setopt( e, CURLOPT_SSL_VERIFYPEER, 0L ); |
---|
162 | curl_easy_setopt( e, CURLOPT_TIMEOUT, getTimeoutFromURL( task ) ); |
---|
163 | curl_easy_setopt( e, CURLOPT_URL, task->url ); |
---|
164 | curl_easy_setopt( e, CURLOPT_USERAGENT, TR_NAME "/" SHORT_VERSION_STRING ); |
---|
165 | curl_easy_setopt( e, CURLOPT_VERBOSE, verbose ); |
---|
166 | curl_easy_setopt( e, CURLOPT_WRITEDATA, task ); |
---|
167 | curl_easy_setopt( e, CURLOPT_WRITEFUNCTION, writeFunc ); |
---|
168 | |
---|
169 | if(( addr = tr_sessionGetPublicAddress( s, TR_AF_INET ))) |
---|
170 | curl_easy_setopt( e, CURLOPT_INTERFACE, tr_ntop_non_ts( addr ) ); |
---|
171 | |
---|
172 | if( task->range ) |
---|
173 | curl_easy_setopt( e, CURLOPT_RANGE, task->range ); |
---|
174 | |
---|
175 | tr_free( cookie_filename ); |
---|
176 | return e; |
---|
177 | } |
---|
178 | |
---|
179 | /*** |
---|
180 | **** |
---|
181 | ***/ |
---|
182 | |
---|
183 | static void |
---|
184 | task_finish_func( void * vtask ) |
---|
185 | { |
---|
186 | struct tr_web_task * task = vtask; |
---|
187 | dbgmsg( "finished web task %p; got %ld", task, task->code ); |
---|
188 | |
---|
189 | if( task->done_func != NULL ) |
---|
190 | task->done_func( task->session, |
---|
191 | task->code, |
---|
192 | EVBUFFER_DATA( task->response ), |
---|
193 | EVBUFFER_LENGTH( task->response ), |
---|
194 | task->done_func_user_data ); |
---|
195 | |
---|
196 | task_free( task ); |
---|
197 | } |
---|
198 | |
---|
199 | /**** |
---|
200 | ***** |
---|
201 | ****/ |
---|
202 | |
---|
203 | void |
---|
204 | tr_webRun( tr_session * session, |
---|
205 | const char * url, |
---|
206 | const char * range, |
---|
207 | tr_web_done_func done_func, |
---|
208 | void * done_func_user_data ) |
---|
209 | { |
---|
210 | struct tr_web * web = session->web; |
---|
211 | |
---|
212 | if( web != NULL ) |
---|
213 | { |
---|
214 | struct tr_web_task * task = tr_new0( struct tr_web_task, 1 ); |
---|
215 | |
---|
216 | task->session = session; |
---|
217 | task->url = tr_strdup( url ); |
---|
218 | task->range = tr_strdup( range ); |
---|
219 | task->done_func = done_func; |
---|
220 | task->done_func_user_data = done_func_user_data; |
---|
221 | task->response = evbuffer_new( ); |
---|
222 | |
---|
223 | tr_lockLock( web->taskLock ); |
---|
224 | tr_list_append( &web->tasks, task ); |
---|
225 | tr_lockUnlock( web->taskLock ); |
---|
226 | } |
---|
227 | } |
---|
228 | |
---|
229 | /** |
---|
230 | * Portability wrapper for select(). |
---|
231 | * |
---|
232 | * http://msdn.microsoft.com/en-us/library/ms740141%28VS.85%29.aspx |
---|
233 | * On win32, any two of the parameters, readfds, writefds, or exceptfds, |
---|
234 | * can be given as null. At least one must be non-null, and any non-null |
---|
235 | * descriptor set must contain at least one handle to a socket. |
---|
236 | */ |
---|
237 | static void |
---|
238 | tr_select( int nfds, |
---|
239 | fd_set * r_fd_set, fd_set * w_fd_set, fd_set * c_fd_set, |
---|
240 | struct timeval * t ) |
---|
241 | { |
---|
242 | #ifdef WIN32 |
---|
243 | if( !r_fd_set->fd_count && !w_fd_set->fd_count && !c_fd_set->fd_count ) |
---|
244 | { |
---|
245 | const long int msec = t->tv_sec*1000 + t->tv_usec/1000; |
---|
246 | tr_wait_msec( msec ); |
---|
247 | } |
---|
248 | else if( select( 0, r_fd_set->fd_count ? r_fd_set : NULL, |
---|
249 | w_fd_set->fd_count ? w_fd_set : NULL, |
---|
250 | c_fd_set->fd_count ? c_fd_set : NULL, t ) < 0 ) |
---|
251 | { |
---|
252 | char errstr[512]; |
---|
253 | const int e = EVUTIL_SOCKET_ERROR( ); |
---|
254 | tr_net_strerror( errstr, sizeof( errstr ), e ); |
---|
255 | dbgmsg( "Error: select (%d) %s", e, errstr ); |
---|
256 | } |
---|
257 | #else |
---|
258 | select( nfds, r_fd_set, w_fd_set, c_fd_set, t ); |
---|
259 | #endif |
---|
260 | } |
---|
261 | |
---|
262 | static void |
---|
263 | tr_webThreadFunc( void * vsession ) |
---|
264 | { |
---|
265 | int unused; |
---|
266 | CURLM * multi; |
---|
267 | struct tr_web * web; |
---|
268 | int taskCount = 0; |
---|
269 | tr_session * session = vsession; |
---|
270 | |
---|
271 | /* try to enable ssl for https support; but if that fails, |
---|
272 | * try a plain vanilla init */ |
---|
273 | if( curl_global_init( CURL_GLOBAL_SSL ) ) |
---|
274 | curl_global_init( 0 ); |
---|
275 | |
---|
276 | web = tr_new0( struct tr_web, 1 ); |
---|
277 | web->close_mode = ~0; |
---|
278 | web->taskLock = tr_lockNew( ); |
---|
279 | web->tasks = NULL; |
---|
280 | multi = curl_multi_init( ); |
---|
281 | session->web = web; |
---|
282 | |
---|
283 | for( ;; ) |
---|
284 | { |
---|
285 | long msec; |
---|
286 | CURLMsg * msg; |
---|
287 | CURLMcode mcode; |
---|
288 | struct tr_web_task * task; |
---|
289 | |
---|
290 | if( web->close_mode == TR_WEB_CLOSE_NOW ) |
---|
291 | break; |
---|
292 | if( ( web->close_mode == TR_WEB_CLOSE_WHEN_IDLE ) && !taskCount ) |
---|
293 | break; |
---|
294 | |
---|
295 | /* add tasks from the queue */ |
---|
296 | tr_lockLock( web->taskLock ); |
---|
297 | while(( task = tr_list_pop_front( &web->tasks ))) |
---|
298 | { |
---|
299 | dbgmsg( "adding task to curl: [%s]\n", task->url ); |
---|
300 | curl_multi_add_handle( multi, createEasy( session, task )); |
---|
301 | /*fprintf( stderr, "adding a task.. taskCount is now %d\n", taskCount );*/ |
---|
302 | ++taskCount; |
---|
303 | } |
---|
304 | tr_lockUnlock( web->taskLock ); |
---|
305 | |
---|
306 | /* maybe wait a little while before calling curl_multi_perform() */ |
---|
307 | msec = 0; |
---|
308 | curl_multi_timeout( multi, &msec ); |
---|
309 | if( msec < 0 ) |
---|
310 | msec = THREADFUNC_MAX_SLEEP_MSEC; |
---|
311 | if( msec > 0 ) |
---|
312 | { |
---|
313 | int usec; |
---|
314 | int max_fd; |
---|
315 | struct timeval t; |
---|
316 | fd_set r_fd_set, w_fd_set, c_fd_set; |
---|
317 | |
---|
318 | max_fd = 0; |
---|
319 | FD_ZERO( &r_fd_set ); |
---|
320 | FD_ZERO( &w_fd_set ); |
---|
321 | FD_ZERO( &c_fd_set ); |
---|
322 | curl_multi_fdset( multi, &r_fd_set, &w_fd_set, &c_fd_set, &max_fd ); |
---|
323 | |
---|
324 | if( msec > THREADFUNC_MAX_SLEEP_MSEC ) |
---|
325 | msec = THREADFUNC_MAX_SLEEP_MSEC; |
---|
326 | |
---|
327 | usec = msec * 1000; |
---|
328 | t.tv_sec = usec / 1000000; |
---|
329 | t.tv_usec = usec % 1000000; |
---|
330 | |
---|
331 | tr_select( max_fd+1, &r_fd_set, &w_fd_set, &c_fd_set, &t ); |
---|
332 | } |
---|
333 | |
---|
334 | /* call curl_multi_perform() */ |
---|
335 | do { |
---|
336 | mcode = curl_multi_perform( multi, &unused ); |
---|
337 | } while( mcode == CURLM_CALL_MULTI_PERFORM ); |
---|
338 | |
---|
339 | /* pump completed tasks from the multi */ |
---|
340 | while(( msg = curl_multi_info_read( multi, &unused ))) |
---|
341 | { |
---|
342 | if(( msg->msg == CURLMSG_DONE ) && ( msg->easy_handle != NULL )) |
---|
343 | { |
---|
344 | struct tr_web_task * task; |
---|
345 | CURL * e = msg->easy_handle; |
---|
346 | curl_easy_getinfo( e, CURLINFO_PRIVATE, (void*)&task ); |
---|
347 | curl_easy_getinfo( e, CURLINFO_RESPONSE_CODE, &task->code ); |
---|
348 | curl_multi_remove_handle( multi, e ); |
---|
349 | curl_easy_cleanup( e ); |
---|
350 | /*fprintf( stderr, "removing a completed task.. taskCount is now %d (response code: %d, response len: %d)\n", taskCount, (int)task->code, (int)EVBUFFER_LENGTH(task->response) );*/ |
---|
351 | tr_runInEventThread( task->session, task_finish_func, task ); |
---|
352 | --taskCount; |
---|
353 | } |
---|
354 | } |
---|
355 | } |
---|
356 | |
---|
357 | /* cleanup */ |
---|
358 | curl_multi_cleanup( multi ); |
---|
359 | tr_lockFree( web->taskLock ); |
---|
360 | tr_free( web ); |
---|
361 | session->web = NULL; |
---|
362 | } |
---|
363 | |
---|
364 | void |
---|
365 | tr_webInit( tr_session * session ) |
---|
366 | { |
---|
367 | tr_threadNew( tr_webThreadFunc, session ); |
---|
368 | } |
---|
369 | |
---|
370 | void |
---|
371 | tr_webClose( tr_session * session, tr_web_close_mode close_mode ) |
---|
372 | { |
---|
373 | if( session->web != NULL ) |
---|
374 | { |
---|
375 | session->web->close_mode = close_mode; |
---|
376 | |
---|
377 | if( close_mode == TR_WEB_CLOSE_NOW ) |
---|
378 | while( session->web != NULL ) |
---|
379 | tr_wait_msec( 100 ); |
---|
380 | } |
---|
381 | } |
---|
382 | |
---|
383 | /***** |
---|
384 | ****** |
---|
385 | ****** |
---|
386 | *****/ |
---|
387 | |
---|
388 | const char * |
---|
389 | tr_webGetResponseStr( long code ) |
---|
390 | { |
---|
391 | switch( code ) |
---|
392 | { |
---|
393 | case 0: return "No Response"; |
---|
394 | case 101: return "Switching Protocols"; |
---|
395 | case 200: return "OK"; |
---|
396 | case 201: return "Created"; |
---|
397 | case 202: return "Accepted"; |
---|
398 | case 203: return "Non-Authoritative Information"; |
---|
399 | case 204: return "No Content"; |
---|
400 | case 205: return "Reset Content"; |
---|
401 | case 206: return "Partial Content"; |
---|
402 | case 300: return "Multiple Choices"; |
---|
403 | case 301: return "Moved Permanently"; |
---|
404 | case 302: return "Found"; |
---|
405 | case 303: return "See Other"; |
---|
406 | case 304: return "Not Modified"; |
---|
407 | case 305: return "Use Proxy"; |
---|
408 | case 306: return "(Unused)"; |
---|
409 | case 307: return "Temporary Redirect"; |
---|
410 | case 400: return "Bad Request"; |
---|
411 | case 401: return "Unauthorized"; |
---|
412 | case 402: return "Payment Required"; |
---|
413 | case 403: return "Forbidden"; |
---|
414 | case 404: return "Not Found"; |
---|
415 | case 405: return "Method Not Allowed"; |
---|
416 | case 406: return "Not Acceptable"; |
---|
417 | case 407: return "Proxy Authentication Required"; |
---|
418 | case 408: return "Request Timeout"; |
---|
419 | case 409: return "Conflict"; |
---|
420 | case 410: return "Gone"; |
---|
421 | case 411: return "Length Required"; |
---|
422 | case 412: return "Precondition Failed"; |
---|
423 | case 413: return "Request Entity Too Large"; |
---|
424 | case 414: return "Request-URI Too Long"; |
---|
425 | case 415: return "Unsupported Media Type"; |
---|
426 | case 416: return "Requested Range Not Satisfiable"; |
---|
427 | case 417: return "Expectation Failed"; |
---|
428 | case 500: return "Internal Server Error"; |
---|
429 | case 501: return "Not Implemented"; |
---|
430 | case 502: return "Bad Gateway"; |
---|
431 | case 503: return "Service Unavailable"; |
---|
432 | case 504: return "Gateway Timeout"; |
---|
433 | case 505: return "HTTP Version Not Supported"; |
---|
434 | default: return "Unknown Error"; |
---|
435 | } |
---|
436 | } |
---|
437 | |
---|
438 | void |
---|
439 | tr_http_escape( struct evbuffer * out, |
---|
440 | const char * str, int len, tr_bool escape_slashes ) |
---|
441 | { |
---|
442 | const char * end; |
---|
443 | |
---|
444 | if( ( len < 0 ) && ( str != NULL ) ) |
---|
445 | len = strlen( str ); |
---|
446 | |
---|
447 | for( end=str+len; str && str!=end; ++str ) { |
---|
448 | if( ( *str == ',' ) |
---|
449 | || ( *str == '-' ) |
---|
450 | || ( *str == '.' ) |
---|
451 | || ( ( '0' <= *str ) && ( *str <= '9' ) ) |
---|
452 | || ( ( 'A' <= *str ) && ( *str <= 'Z' ) ) |
---|
453 | || ( ( 'a' <= *str ) && ( *str <= 'z' ) ) |
---|
454 | || ( ( *str == '/' ) && ( !escape_slashes ) ) ) |
---|
455 | evbuffer_add( out, str, 1 ); |
---|
456 | else |
---|
457 | evbuffer_add_printf( out, "%%%02X", (unsigned)(*str&0xFF) ); |
---|
458 | } |
---|
459 | } |
---|
460 | |
---|
461 | char * |
---|
462 | tr_http_unescape( const char * str, int len ) |
---|
463 | { |
---|
464 | char * tmp = curl_unescape( str, len ); |
---|
465 | char * ret = tr_strdup( tmp ); |
---|
466 | curl_free( tmp ); |
---|
467 | return ret; |
---|
468 | } |
---|