| |
| /* Copyright 1998 by the Massachusetts Institute of Technology. |
| * Copyright (C) 2004-2010 by Daniel Stenberg |
| * |
| * Permission to use, copy, modify, and distribute this |
| * software and its documentation for any purpose and without |
| * fee is hereby granted, provided that the above copyright |
| * notice appear in all copies and that both that copyright |
| * notice and this permission notice appear in supporting |
| * documentation, and that the name of M.I.T. not be used in |
| * advertising or publicity pertaining to distribution of the |
| * software without specific, written prior permission. |
| * M.I.T. makes no representations about the suitability of |
| * this software for any purpose. It is provided "as is" |
| * without express or implied warranty. |
| */ |
| |
| #include "ares_setup.h" |
| |
| #ifdef HAVE_SYS_SOCKET_H |
| # include <sys/socket.h> |
| #endif |
| #ifdef HAVE_SYS_UIO_H |
| # include <sys/uio.h> |
| #endif |
| #ifdef HAVE_NETINET_IN_H |
| # include <netinet/in.h> |
| #endif |
| #ifdef HAVE_NETINET_TCP_H |
| # include <netinet/tcp.h> |
| #endif |
| #ifdef HAVE_NETDB_H |
| # include <netdb.h> |
| #endif |
| #ifdef HAVE_ARPA_NAMESER_H |
| # include <arpa/nameser.h> |
| #else |
| # include "nameser.h" |
| #endif |
| #ifdef HAVE_ARPA_NAMESER_COMPAT_H |
| # include <arpa/nameser_compat.h> |
| #endif |
| |
| #ifdef HAVE_SYS_TIME_H |
| # include <sys/time.h> |
| #endif |
| |
| #ifdef HAVE_STRINGS_H |
| # include <strings.h> |
| #endif |
| #ifdef HAVE_UNISTD_H |
| # include <unistd.h> |
| #endif |
| #ifdef HAVE_SYS_IOCTL_H |
| # include <sys/ioctl.h> |
| #endif |
| #ifdef NETWARE |
| # include <sys/filio.h> |
| #endif |
| |
| #include <assert.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <fcntl.h> |
| #include <time.h> |
| |
| #include "ares.h" |
| #include "ares_dns.h" |
| #include "ares_nowarn.h" |
| #include "ares_private.h" |
| |
| |
| static int try_again(int errnum); |
| static void write_tcp_data(ares_channel channel, fd_set *write_fds, |
| ares_socket_t write_fd, struct timeval *now); |
| static void read_tcp_data(ares_channel channel, fd_set *read_fds, |
| ares_socket_t read_fd, struct timeval *now); |
| static void read_udp_packets(ares_channel channel, fd_set *read_fds, |
| ares_socket_t read_fd, struct timeval *now); |
| static void advance_tcp_send_queue(ares_channel channel, int whichserver, |
| ssize_t num_bytes); |
| static void process_timeouts(ares_channel channel, struct timeval *now); |
| static void process_broken_connections(ares_channel channel, |
| struct timeval *now); |
| static void process_answer(ares_channel channel, unsigned char *abuf, |
| int alen, int whichserver, int tcp, |
| struct timeval *now); |
| static void handle_error(ares_channel channel, int whichserver, |
| struct timeval *now); |
| static void skip_server(ares_channel channel, struct query *query, |
| int whichserver); |
| static void next_server(ares_channel channel, struct query *query, |
| struct timeval *now); |
| static int open_tcp_socket(ares_channel channel, struct server_state *server); |
| static int open_udp_socket(ares_channel channel, struct server_state *server); |
| static int same_questions(const unsigned char *qbuf, int qlen, |
| const unsigned char *abuf, int alen); |
| static int same_address(struct sockaddr *sa, struct ares_addr *aa); |
| static void end_query(ares_channel channel, struct query *query, int status, |
| unsigned char *abuf, int alen); |
| |
| /* return true if now is exactly check time or later */ |
| int ares__timedout(struct timeval *now, |
| struct timeval *check) |
| { |
| long secs = (now->tv_sec - check->tv_sec); |
| |
| if(secs > 0) |
| return 1; /* yes, timed out */ |
| if(secs < 0) |
| return 0; /* nope, not timed out */ |
| |
| /* if the full seconds were identical, check the sub second parts */ |
| return (now->tv_usec - check->tv_usec >= 0); |
| } |
| |
| /* add the specific number of milliseconds to the time in the first argument */ |
| int ares__timeadd(struct timeval *now, |
| int millisecs) |
| { |
| now->tv_sec += millisecs/1000; |
| now->tv_usec += (millisecs%1000)*1000; |
| |
| if(now->tv_usec >= 1000000) { |
| ++(now->tv_sec); |
| now->tv_usec -= 1000000; |
| } |
| |
| return 0; |
| } |
| |
| /* return time offset between now and (future) check, in milliseconds */ |
| long ares__timeoffset(struct timeval *now, |
| struct timeval *check) |
| { |
| return (check->tv_sec - now->tv_sec)*1000 + |
| (check->tv_usec - now->tv_usec)/1000; |
| } |
| |
| |
| /* |
| * generic process function |
| */ |
| static void processfds(ares_channel channel, |
| fd_set *read_fds, ares_socket_t read_fd, |
| fd_set *write_fds, ares_socket_t write_fd) |
| { |
| struct timeval now = ares__tvnow(); |
| |
| write_tcp_data(channel, write_fds, write_fd, &now); |
| read_tcp_data(channel, read_fds, read_fd, &now); |
| read_udp_packets(channel, read_fds, read_fd, &now); |
| process_timeouts(channel, &now); |
| process_broken_connections(channel, &now); |
| } |
| |
| /* Something interesting happened on the wire, or there was a timeout. |
| * See what's up and respond accordingly. |
| */ |
| void ares_process(ares_channel channel, fd_set *read_fds, fd_set *write_fds) |
| { |
| processfds(channel, read_fds, ARES_SOCKET_BAD, write_fds, ARES_SOCKET_BAD); |
| } |
| |
| /* Something interesting happened on the wire, or there was a timeout. |
| * See what's up and respond accordingly. |
| */ |
| void ares_process_fd(ares_channel channel, |
| ares_socket_t read_fd, /* use ARES_SOCKET_BAD or valid |
| file descriptors */ |
| ares_socket_t write_fd) |
| { |
| processfds(channel, NULL, read_fd, NULL, write_fd); |
| } |
| |
| |
| /* Return 1 if the specified error number describes a readiness error, or 0 |
| * otherwise. This is mostly for HP-UX, which could return EAGAIN or |
| * EWOULDBLOCK. See this man page |
| * |
| * http://devrsrc1.external.hp.com/STKS/cgi-bin/man2html? |
| * manpage=/usr/share/man/man2.Z/send.2 |
| */ |
| static int try_again(int errnum) |
| { |
| #if !defined EWOULDBLOCK && !defined EAGAIN |
| #error "Neither EWOULDBLOCK nor EAGAIN defined" |
| #endif |
| switch (errnum) |
| { |
| #ifdef EWOULDBLOCK |
| case EWOULDBLOCK: |
| return 1; |
| #endif |
| #if defined EAGAIN && EAGAIN != EWOULDBLOCK |
| case EAGAIN: |
| return 1; |
| #endif |
| } |
| return 0; |
| } |
| |
| /* If any TCP sockets select true for writing, write out queued data |
| * we have for them. |
| */ |
| static void write_tcp_data(ares_channel channel, |
| fd_set *write_fds, |
| ares_socket_t write_fd, |
| struct timeval *now) |
| { |
| struct server_state *server; |
| struct send_request *sendreq; |
| struct iovec *vec; |
| int i; |
| ssize_t scount; |
| ssize_t wcount; |
| size_t n; |
| |
| if(!write_fds && (write_fd == ARES_SOCKET_BAD)) |
| /* no possible action */ |
| return; |
| |
| for (i = 0; i < channel->nservers; i++) |
| { |
| /* Make sure server has data to send and is selected in write_fds or |
| write_fd. */ |
| server = &channel->servers[i]; |
| if (!server->qhead || server->tcp_socket == ARES_SOCKET_BAD || |
| server->is_broken) |
| continue; |
| |
| if(write_fds) { |
| if(!FD_ISSET(server->tcp_socket, write_fds)) |
| continue; |
| } |
| else { |
| if(server->tcp_socket != write_fd) |
| continue; |
| } |
| |
| if(write_fds) |
| /* If there's an error and we close this socket, then open |
| * another with the same fd to talk to another server, then we |
| * don't want to think that it was the new socket that was |
| * ready. This is not disastrous, but is likely to result in |
| * extra system calls and confusion. */ |
| FD_CLR(server->tcp_socket, write_fds); |
| |
| /* Count the number of send queue items. */ |
| n = 0; |
| for (sendreq = server->qhead; sendreq; sendreq = sendreq->next) |
| n++; |
| |
| /* Allocate iovecs so we can send all our data at once. */ |
| vec = malloc(n * sizeof(struct iovec)); |
| if (vec) |
| { |
| /* Fill in the iovecs and send. */ |
| n = 0; |
| for (sendreq = server->qhead; sendreq; sendreq = sendreq->next) |
| { |
| vec[n].iov_base = (char *) sendreq->data; |
| vec[n].iov_len = sendreq->len; |
| n++; |
| } |
| wcount = (ssize_t)writev(server->tcp_socket, vec, (int)n); |
| free(vec); |
| if (wcount < 0) |
| { |
| if (!try_again(SOCKERRNO)) |
| handle_error(channel, i, now); |
| continue; |
| } |
| |
| /* Advance the send queue by as many bytes as we sent. */ |
| advance_tcp_send_queue(channel, i, wcount); |
| } |
| else |
| { |
| /* Can't allocate iovecs; just send the first request. */ |
| sendreq = server->qhead; |
| |
| scount = swrite(server->tcp_socket, sendreq->data, sendreq->len); |
| if (scount < 0) |
| { |
| if (!try_again(SOCKERRNO)) |
| handle_error(channel, i, now); |
| continue; |
| } |
| |
| /* Advance the send queue by as many bytes as we sent. */ |
| advance_tcp_send_queue(channel, i, scount); |
| } |
| } |
| } |
| |
| /* Consume the given number of bytes from the head of the TCP send queue. */ |
| static void advance_tcp_send_queue(ares_channel channel, int whichserver, |
| ssize_t num_bytes) |
| { |
| struct send_request *sendreq; |
| struct server_state *server = &channel->servers[whichserver]; |
| while (num_bytes > 0) { |
| sendreq = server->qhead; |
| if ((size_t)num_bytes >= sendreq->len) { |
| num_bytes -= sendreq->len; |
| server->qhead = sendreq->next; |
| if (sendreq->data_storage) |
| free(sendreq->data_storage); |
| free(sendreq); |
| if (server->qhead == NULL) { |
| SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 0); |
| server->qtail = NULL; |
| |
| /* qhead is NULL so we cannot continue this loop */ |
| break; |
| } |
| } |
| else { |
| sendreq->data += num_bytes; |
| sendreq->len -= num_bytes; |
| num_bytes = 0; |
| } |
| } |
| } |
| |
| /* If any TCP socket selects true for reading, read some data, |
| * allocate a buffer if we finish reading the length word, and process |
| * a packet if we finish reading one. |
| */ |
| static void read_tcp_data(ares_channel channel, fd_set *read_fds, |
| ares_socket_t read_fd, struct timeval *now) |
| { |
| struct server_state *server; |
| int i; |
| ssize_t count; |
| |
| if(!read_fds && (read_fd == ARES_SOCKET_BAD)) |
| /* no possible action */ |
| return; |
| |
| for (i = 0; i < channel->nservers; i++) |
| { |
| /* Make sure the server has a socket and is selected in read_fds. */ |
| server = &channel->servers[i]; |
| if (server->tcp_socket == ARES_SOCKET_BAD || server->is_broken) |
| continue; |
| |
| if(read_fds) { |
| if(!FD_ISSET(server->tcp_socket, read_fds)) |
| continue; |
| } |
| else { |
| if(server->tcp_socket != read_fd) |
| continue; |
| } |
| |
| if(read_fds) |
| /* If there's an error and we close this socket, then open |
| * another with the same fd to talk to another server, then we |
| * don't want to think that it was the new socket that was |
| * ready. This is not disastrous, but is likely to result in |
| * extra system calls and confusion. */ |
| FD_CLR(server->tcp_socket, read_fds); |
| |
| if (server->tcp_lenbuf_pos != 2) |
| { |
| /* We haven't yet read a length word, so read that (or |
| * what's left to read of it). |
| */ |
| count = sread(server->tcp_socket, |
| server->tcp_lenbuf + server->tcp_lenbuf_pos, |
| 2 - server->tcp_lenbuf_pos); |
| if (count <= 0) |
| { |
| if (!(count == -1 && try_again(SOCKERRNO))) |
| handle_error(channel, i, now); |
| continue; |
| } |
| |
| server->tcp_lenbuf_pos += (int)count; |
| if (server->tcp_lenbuf_pos == 2) |
| { |
| /* We finished reading the length word. Decode the |
| * length and allocate a buffer for the data. |
| */ |
| server->tcp_length = server->tcp_lenbuf[0] << 8 |
| | server->tcp_lenbuf[1]; |
| server->tcp_buffer = malloc(server->tcp_length); |
| if (!server->tcp_buffer) |
| handle_error(channel, i, now); |
| server->tcp_buffer_pos = 0; |
| } |
| } |
| else |
| { |
| /* Read data into the allocated buffer. */ |
| count = sread(server->tcp_socket, |
| server->tcp_buffer + server->tcp_buffer_pos, |
| server->tcp_length - server->tcp_buffer_pos); |
| if (count <= 0) |
| { |
| if (!(count == -1 && try_again(SOCKERRNO))) |
| handle_error(channel, i, now); |
| continue; |
| } |
| |
| server->tcp_buffer_pos += (int)count; |
| if (server->tcp_buffer_pos == server->tcp_length) |
| { |
| /* We finished reading this answer; process it and |
| * prepare to read another length word. |
| */ |
| process_answer(channel, server->tcp_buffer, server->tcp_length, |
| i, 1, now); |
| if (server->tcp_buffer) |
| free(server->tcp_buffer); |
| server->tcp_buffer = NULL; |
| server->tcp_lenbuf_pos = 0; |
| server->tcp_buffer_pos = 0; |
| } |
| } |
| } |
| } |
| |
| /* If any UDP sockets select true for reading, process them. */ |
| static void read_udp_packets(ares_channel channel, fd_set *read_fds, |
| ares_socket_t read_fd, struct timeval *now) |
| { |
| struct server_state *server; |
| int i; |
| ssize_t count; |
| unsigned char buf[PACKETSZ + 1]; |
| #ifdef HAVE_RECVFROM |
| ares_socklen_t fromlen; |
| union { |
| struct sockaddr sa; |
| struct sockaddr_in sa4; |
| struct sockaddr_in6 sa6; |
| } from; |
| #endif |
| |
| if(!read_fds && (read_fd == ARES_SOCKET_BAD)) |
| /* no possible action */ |
| return; |
| |
| for (i = 0; i < channel->nservers; i++) |
| { |
| /* Make sure the server has a socket and is selected in read_fds. */ |
| server = &channel->servers[i]; |
| |
| if (server->udp_socket == ARES_SOCKET_BAD || server->is_broken) |
| continue; |
| |
| if(read_fds) { |
| if(!FD_ISSET(server->udp_socket, read_fds)) |
| continue; |
| } |
| else { |
| if(server->udp_socket != read_fd) |
| continue; |
| } |
| |
| if(read_fds) |
| /* If there's an error and we close this socket, then open |
| * another with the same fd to talk to another server, then we |
| * don't want to think that it was the new socket that was |
| * ready. This is not disastrous, but is likely to result in |
| * extra system calls and confusion. */ |
| FD_CLR(server->udp_socket, read_fds); |
| |
| /* To reduce event loop overhead, read and process as many |
| * packets as we can. */ |
| do { |
| #ifdef HAVE_RECVFROM |
| if (server->addr.family == AF_INET) |
| fromlen = sizeof(from.sa4); |
| else |
| fromlen = sizeof(from.sa6); |
| count = (ssize_t)recvfrom(server->udp_socket, (void *)buf, sizeof(buf), |
| 0, &from.sa, &fromlen); |
| #else |
| count = sread(server->udp_socket, buf, sizeof(buf)); |
| #endif |
| if (count == -1 && try_again(SOCKERRNO)) |
| continue; |
| else if (count <= 0) |
| handle_error(channel, i, now); |
| #ifdef HAVE_RECVFROM |
| else if (!same_address(&from.sa, &server->addr)) |
| /* The address the response comes from does not match |
| * the address we sent the request to. Someone may be |
| * attempting to perform a cache poisoning attack. */ |
| break; |
| #endif |
| else |
| process_answer(channel, buf, (int)count, i, 0, now); |
| } while (count > 0); |
| } |
| } |
| |
| /* If any queries have timed out, note the timeout and move them on. */ |
| static void process_timeouts(ares_channel channel, struct timeval *now) |
| { |
| time_t t; /* the time of the timeouts we're processing */ |
| struct query *query; |
| struct list_node* list_head; |
| struct list_node* list_node; |
| |
| /* Process all the timeouts that have fired since the last time we |
| * processed timeouts. If things are going well, then we'll have |
| * hundreds/thousands of queries that fall into future buckets, and |
| * only a handful of requests that fall into the "now" bucket, so |
| * this should be quite quick. |
| */ |
| for (t = channel->last_timeout_processed; t <= now->tv_sec; t++) |
| { |
| list_head = &(channel->queries_by_timeout[t % ARES_TIMEOUT_TABLE_SIZE]); |
| for (list_node = list_head->next; list_node != list_head; ) |
| { |
| query = list_node->data; |
| list_node = list_node->next; /* in case the query gets deleted */ |
| if (query->timeout.tv_sec && ares__timedout(now, &query->timeout)) |
| { |
| query->error_status = ARES_ETIMEOUT; |
| ++query->timeouts; |
| next_server(channel, query, now); |
| } |
| } |
| } |
| channel->last_timeout_processed = now->tv_sec; |
| } |
| |
| /* Handle an answer from a server. */ |
| static void process_answer(ares_channel channel, unsigned char *abuf, |
| int alen, int whichserver, int tcp, |
| struct timeval *now) |
| { |
| int tc, rcode; |
| unsigned short id; |
| struct query *query; |
| struct list_node* list_head; |
| struct list_node* list_node; |
| |
| /* If there's no room in the answer for a header, we can't do much |
| * with it. */ |
| if (alen < HFIXEDSZ) |
| return; |
| |
| /* Grab the query ID, truncate bit, and response code from the packet. */ |
| id = DNS_HEADER_QID(abuf); |
| tc = DNS_HEADER_TC(abuf); |
| rcode = DNS_HEADER_RCODE(abuf); |
| |
| /* Find the query corresponding to this packet. The queries are |
| * hashed/bucketed by query id, so this lookup should be quick. |
| * Note that both the query id and the questions must be the same; |
| * when the query id wraps around we can have multiple outstanding |
| * queries with the same query id, so we need to check both the id and |
| * question. |
| */ |
| query = NULL; |
| list_head = &(channel->queries_by_qid[id % ARES_QID_TABLE_SIZE]); |
| for (list_node = list_head->next; list_node != list_head; |
| list_node = list_node->next) |
| { |
| struct query *q = list_node->data; |
| if ((q->qid == id) && same_questions(q->qbuf, q->qlen, abuf, alen)) |
| { |
| query = q; |
| break; |
| } |
| } |
| if (!query) |
| return; |
| |
| /* If we got a truncated UDP packet and are not ignoring truncation, |
| * don't accept the packet, and switch the query to TCP if we hadn't |
| * done so already. |
| */ |
| if ((tc || alen > PACKETSZ) && !tcp && !(channel->flags & ARES_FLAG_IGNTC)) |
| { |
| if (!query->using_tcp) |
| { |
| query->using_tcp = 1; |
| ares__send_query(channel, query, now); |
| } |
| return; |
| } |
| |
| /* Limit alen to PACKETSZ if we aren't using TCP (only relevant if we |
| * are ignoring truncation. |
| */ |
| if (alen > PACKETSZ && !tcp) |
| alen = PACKETSZ; |
| |
| /* If we aren't passing through all error packets, discard packets |
| * with SERVFAIL, NOTIMP, or REFUSED response codes. |
| */ |
| if (!(channel->flags & ARES_FLAG_NOCHECKRESP)) |
| { |
| if (rcode == SERVFAIL || rcode == NOTIMP || rcode == REFUSED) |
| { |
| skip_server(channel, query, whichserver); |
| if (query->server == whichserver) |
| next_server(channel, query, now); |
| return; |
| } |
| } |
| |
| end_query(channel, query, ARES_SUCCESS, abuf, alen); |
| } |
| |
| /* Close all the connections that are no longer usable. */ |
| static void process_broken_connections(ares_channel channel, |
| struct timeval *now) |
| { |
| int i; |
| for (i = 0; i < channel->nservers; i++) |
| { |
| struct server_state *server = &channel->servers[i]; |
| if (server->is_broken) |
| { |
| handle_error(channel, i, now); |
| } |
| } |
| } |
| |
| static void handle_error(ares_channel channel, int whichserver, |
| struct timeval *now) |
| { |
| struct server_state *server; |
| struct query *query; |
| struct list_node list_head; |
| struct list_node* list_node; |
| |
| server = &channel->servers[whichserver]; |
| |
| /* Reset communications with this server. */ |
| ares__close_sockets(channel, server); |
| |
| /* Tell all queries talking to this server to move on and not try |
| * this server again. We steal the current list of queries that were |
| * in-flight to this server, since when we call next_server this can |
| * cause the queries to be re-sent to this server, which will |
| * re-insert these queries in that same server->queries_to_server |
| * list. |
| */ |
| ares__init_list_head(&list_head); |
| ares__swap_lists(&list_head, &(server->queries_to_server)); |
| for (list_node = list_head.next; list_node != &list_head; ) |
| { |
| query = list_node->data; |
| list_node = list_node->next; /* in case the query gets deleted */ |
| assert(query->server == whichserver); |
| skip_server(channel, query, whichserver); |
| next_server(channel, query, now); |
| } |
| /* Each query should have removed itself from our temporary list as |
| * it re-sent itself or finished up... |
| */ |
| assert(ares__is_list_empty(&list_head)); |
| } |
| |
| static void skip_server(ares_channel channel, struct query *query, |
| int whichserver) { |
| /* The given server gave us problems with this query, so if we have |
| * the luxury of using other servers, then let's skip the |
| * potentially broken server and just use the others. If we only |
| * have one server and we need to retry then we should just go ahead |
| * and re-use that server, since it's our only hope; perhaps we |
| * just got unlucky, and retrying will work (eg, the server timed |
| * out our TCP connection just as we were sending another request). |
| */ |
| if (channel->nservers > 1) |
| { |
| query->server_info[whichserver].skip_server = 1; |
| } |
| } |
| |
| static void next_server(ares_channel channel, struct query *query, |
| struct timeval *now) |
| { |
| /* We need to try each server channel->tries times. We have channel->nservers |
| * servers to try. In total, we need to do channel->nservers * channel->tries |
| * attempts. Use query->try to remember how many times we already attempted |
| * this query. Use modular arithmetic to find the next server to try. */ |
| while (++(query->try_count) < (channel->nservers * channel->tries)) |
| { |
| struct server_state *server; |
| |
| /* Move on to the next server. */ |
| query->server = (query->server + 1) % channel->nservers; |
| server = &channel->servers[query->server]; |
| |
| /* We don't want to use this server if (1) we decided this |
| * connection is broken, and thus about to be closed, (2) |
| * we've decided to skip this server because of earlier |
| * errors we encountered, or (3) we already sent this query |
| * over this exact connection. |
| */ |
| if (!server->is_broken && |
| !query->server_info[query->server].skip_server && |
| !(query->using_tcp && |
| (query->server_info[query->server].tcp_connection_generation == |
| server->tcp_connection_generation))) |
| { |
| ares__send_query(channel, query, now); |
| return; |
| } |
| |
| /* You might think that with TCP we only need one try. However, |
| * even when using TCP, servers can time-out our connection just |
| * as we're sending a request, or close our connection because |
| * they die, or never send us a reply because they get wedged or |
| * tickle a bug that drops our request. |
| */ |
| } |
| |
| /* If we are here, all attempts to perform query failed. */ |
| end_query(channel, query, query->error_status, NULL, 0); |
| } |
| |
| void ares__send_query(ares_channel channel, struct query *query, |
| struct timeval *now) |
| { |
| struct send_request *sendreq; |
| struct server_state *server; |
| int timeplus; |
| |
| server = &channel->servers[query->server]; |
| if (query->using_tcp) |
| { |
| /* Make sure the TCP socket for this server is set up and queue |
| * a send request. |
| */ |
| if (server->tcp_socket == ARES_SOCKET_BAD) |
| { |
| if (open_tcp_socket(channel, server) == -1) |
| { |
| skip_server(channel, query, query->server); |
| next_server(channel, query, now); |
| return; |
| } |
| } |
| sendreq = calloc(1, sizeof(struct send_request)); |
| if (!sendreq) |
| { |
| end_query(channel, query, ARES_ENOMEM, NULL, 0); |
| return; |
| } |
| /* To make the common case fast, we avoid copies by using the |
| * query's tcpbuf for as long as the query is alive. In the rare |
| * case where the query ends while it's queued for transmission, |
| * then we give the sendreq its own copy of the request packet |
| * and put it in sendreq->data_storage. |
| */ |
| sendreq->data_storage = NULL; |
| sendreq->data = query->tcpbuf; |
| sendreq->len = query->tcplen; |
| sendreq->owner_query = query; |
| sendreq->next = NULL; |
| if (server->qtail) |
| server->qtail->next = sendreq; |
| else |
| { |
| SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 1); |
| server->qhead = sendreq; |
| } |
| server->qtail = sendreq; |
| query->server_info[query->server].tcp_connection_generation = |
| server->tcp_connection_generation; |
| } |
| else |
| { |
| if (server->udp_socket == ARES_SOCKET_BAD) |
| { |
| if (open_udp_socket(channel, server) == -1) |
| { |
| skip_server(channel, query, query->server); |
| next_server(channel, query, now); |
| return; |
| } |
| } |
| if (swrite(server->udp_socket, query->qbuf, query->qlen) == -1) |
| { |
| /* FIXME: Handle EAGAIN here since it likely can happen. */ |
| skip_server(channel, query, query->server); |
| next_server(channel, query, now); |
| return; |
| } |
| } |
| timeplus = channel->timeout << (query->try_count / channel->nservers); |
| timeplus = (timeplus * (9 + (rand () & 7))) / 16; |
| query->timeout = *now; |
| ares__timeadd(&query->timeout, |
| timeplus); |
| /* Keep track of queries bucketed by timeout, so we can process |
| * timeout events quickly. |
| */ |
| ares__remove_from_list(&(query->queries_by_timeout)); |
| ares__insert_in_list( |
| &(query->queries_by_timeout), |
| &(channel->queries_by_timeout[query->timeout.tv_sec % |
| ARES_TIMEOUT_TABLE_SIZE])); |
| |
| /* Keep track of queries bucketed by server, so we can process server |
| * errors quickly. |
| */ |
| ares__remove_from_list(&(query->queries_to_server)); |
| ares__insert_in_list(&(query->queries_to_server), |
| &(server->queries_to_server)); |
| } |
| |
| /* |
| * setsocknonblock sets the given socket to either blocking or non-blocking |
| * mode based on the 'nonblock' boolean argument. This function is highly |
| * portable. |
| */ |
| static int setsocknonblock(ares_socket_t sockfd, /* operate on this */ |
| int nonblock /* TRUE or FALSE */) |
| { |
| #if defined(USE_BLOCKING_SOCKETS) |
| |
| return 0; /* returns success */ |
| |
| #elif defined(HAVE_FCNTL_O_NONBLOCK) |
| |
| /* most recent unix versions */ |
| int flags; |
| flags = fcntl(sockfd, F_GETFL, 0); |
| if (FALSE != nonblock) |
| return fcntl(sockfd, F_SETFL, flags | O_NONBLOCK); |
| else |
| return fcntl(sockfd, F_SETFL, flags & (~O_NONBLOCK)); |
| |
| #elif defined(HAVE_IOCTL_FIONBIO) |
| |
| /* older unix versions */ |
| int flags; |
| flags = nonblock; |
| return ioctl(sockfd, FIONBIO, &flags); |
| |
| #elif defined(HAVE_IOCTLSOCKET_FIONBIO) |
| |
| #ifdef WATT32 |
| char flags; |
| #else |
| /* Windows */ |
| unsigned long flags; |
| #endif |
| flags = nonblock; |
| return ioctlsocket(sockfd, FIONBIO, &flags); |
| |
| #elif defined(HAVE_IOCTLSOCKET_CAMEL_FIONBIO) |
| |
| /* Amiga */ |
| return IoctlSocket(sockfd, FIONBIO, (long)nonblock); |
| |
| #elif defined(HAVE_SETSOCKOPT_SO_NONBLOCK) |
| |
| /* BeOS */ |
| long b = nonblock ? 1 : 0; |
| return setsockopt(sockfd, SOL_SOCKET, SO_NONBLOCK, &b, sizeof(b)); |
| |
| #else |
| # error "no non-blocking method was found/used/set" |
| #endif |
| } |
| |
| static int configure_socket(ares_socket_t s, int family, ares_channel channel) |
| { |
| union { |
| struct sockaddr sa; |
| struct sockaddr_in sa4; |
| struct sockaddr_in6 sa6; |
| } local; |
| |
| setsocknonblock(s, TRUE); |
| |
| #if defined(FD_CLOEXEC) && !defined(MSDOS) |
| /* Configure the socket fd as close-on-exec. */ |
| if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1) |
| return -1; |
| #endif |
| |
| /* Set the socket's send and receive buffer sizes. */ |
| if ((channel->socket_send_buffer_size > 0) && |
| setsockopt(s, SOL_SOCKET, SO_SNDBUF, |
| (void *)&channel->socket_send_buffer_size, |
| sizeof(channel->socket_send_buffer_size)) == -1) |
| return -1; |
| |
| if ((channel->socket_receive_buffer_size > 0) && |
| setsockopt(s, SOL_SOCKET, SO_RCVBUF, |
| (void *)&channel->socket_receive_buffer_size, |
| sizeof(channel->socket_receive_buffer_size)) == -1) |
| return -1; |
| |
| #ifdef SO_BINDTODEVICE |
| if (channel->local_dev_name[0]) { |
| if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, |
| channel->local_dev_name, sizeof(channel->local_dev_name))) { |
| /* Only root can do this, and usually not fatal if it doesn't work, so */ |
| /* just continue on. */ |
| } |
| } |
| #endif |
| |
| if (family == AF_INET) { |
| if (channel->local_ip4) { |
| memset(&local.sa4, 0, sizeof(local.sa4)); |
| local.sa4.sin_family = AF_INET; |
| local.sa4.sin_addr.s_addr = htonl(channel->local_ip4); |
| if (bind(s, &local.sa, sizeof(local.sa4)) < 0) |
| return -1; |
| } |
| } |
| else if (family == AF_INET6) { |
| if (memcmp(channel->local_ip6, &ares_in6addr_any, sizeof(channel->local_ip6)) != 0) { |
| memset(&local.sa6, 0, sizeof(local.sa6)); |
| local.sa6.sin6_family = AF_INET6; |
| memcpy(&local.sa6.sin6_addr, channel->local_ip6, sizeof(channel->local_ip6)); |
| if (bind(s, &local.sa, sizeof(local.sa6)) < 0) |
| return -1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int open_tcp_socket(ares_channel channel, struct server_state *server) |
| { |
| ares_socket_t s; |
| int opt; |
| ares_socklen_t salen; |
| union { |
| struct sockaddr_in sa4; |
| struct sockaddr_in6 sa6; |
| } saddr; |
| struct sockaddr *sa; |
| |
| switch (server->addr.family) |
| { |
| case AF_INET: |
| sa = (void *)&saddr.sa4; |
| salen = sizeof(saddr.sa4); |
| memset(sa, 0, salen); |
| saddr.sa4.sin_family = AF_INET; |
| saddr.sa4.sin_port = (unsigned short)(channel->tcp_port & 0xffff); |
| memcpy(&saddr.sa4.sin_addr, &server->addr.addrV4, |
| sizeof(server->addr.addrV4)); |
| break; |
| case AF_INET6: |
| sa = (void *)&saddr.sa6; |
| salen = sizeof(saddr.sa6); |
| memset(sa, 0, salen); |
| saddr.sa6.sin6_family = AF_INET6; |
| saddr.sa6.sin6_port = (unsigned short)(channel->tcp_port & 0xffff); |
| memcpy(&saddr.sa6.sin6_addr, &server->addr.addrV6, |
| sizeof(server->addr.addrV6)); |
| break; |
| default: |
| return -1; |
| } |
| |
| /* Acquire a socket. */ |
| s = socket(server->addr.family, SOCK_STREAM, 0); |
| if (s == ARES_SOCKET_BAD) |
| return -1; |
| |
| /* Configure it. */ |
| if (configure_socket(s, server->addr.family, channel) < 0) |
| { |
| sclose(s); |
| return -1; |
| } |
| |
| #ifdef TCP_NODELAY |
| /* |
| * Disable the Nagle algorithm (only relevant for TCP sockets, and thus not |
| * in configure_socket). In general, in DNS lookups we're pretty much |
| * interested in firing off a single request and then waiting for a reply, |
| * so batching isn't very interesting. |
| */ |
| opt = 1; |
| if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, |
| (void *)&opt, sizeof(opt)) == -1) |
| { |
| sclose(s); |
| return -1; |
| } |
| #endif |
| |
| /* Connect to the server. */ |
| if (connect(s, sa, salen) == -1) |
| { |
| int err = SOCKERRNO; |
| |
| if (err != EINPROGRESS && err != EWOULDBLOCK) |
| { |
| sclose(s); |
| return -1; |
| } |
| } |
| |
| if (channel->sock_create_cb) |
| { |
| int err = channel->sock_create_cb(s, SOCK_STREAM, |
| channel->sock_create_cb_data); |
| if (err < 0) |
| { |
| sclose(s); |
| return err; |
| } |
| } |
| |
| SOCK_STATE_CALLBACK(channel, s, 1, 0); |
| server->tcp_buffer_pos = 0; |
| server->tcp_socket = s; |
| server->tcp_connection_generation = ++channel->tcp_connection_generation; |
| return 0; |
| } |
| |
| static int open_udp_socket(ares_channel channel, struct server_state *server) |
| { |
| ares_socket_t s; |
| ares_socklen_t salen; |
| union { |
| struct sockaddr_in sa4; |
| struct sockaddr_in6 sa6; |
| } saddr; |
| struct sockaddr *sa; |
| |
| switch (server->addr.family) |
| { |
| case AF_INET: |
| sa = (void *)&saddr.sa4; |
| salen = sizeof(saddr.sa4); |
| memset(sa, 0, salen); |
| saddr.sa4.sin_family = AF_INET; |
| saddr.sa4.sin_port = (unsigned short)(channel->udp_port & 0xffff); |
| memcpy(&saddr.sa4.sin_addr, &server->addr.addrV4, |
| sizeof(server->addr.addrV4)); |
| break; |
| case AF_INET6: |
| sa = (void *)&saddr.sa6; |
| salen = sizeof(saddr.sa6); |
| memset(sa, 0, salen); |
| saddr.sa6.sin6_family = AF_INET6; |
| saddr.sa6.sin6_port = (unsigned short)(channel->udp_port & 0xffff); |
| memcpy(&saddr.sa6.sin6_addr, &server->addr.addrV6, |
| sizeof(server->addr.addrV6)); |
| break; |
| default: |
| return -1; |
| } |
| |
| /* Acquire a socket. */ |
| s = socket(server->addr.family, SOCK_DGRAM, 0); |
| if (s == ARES_SOCKET_BAD) |
| return -1; |
| |
| /* Set the socket non-blocking. */ |
| if (configure_socket(s, server->addr.family, channel) < 0) |
| { |
| sclose(s); |
| return -1; |
| } |
| |
| /* Connect to the server. */ |
| if (connect(s, sa, salen) == -1) |
| { |
| int err = SOCKERRNO; |
| |
| if (err != EINPROGRESS && err != EWOULDBLOCK) |
| { |
| sclose(s); |
| return -1; |
| } |
| } |
| |
| if (channel->sock_create_cb) |
| { |
| int err = channel->sock_create_cb(s, SOCK_DGRAM, |
| channel->sock_create_cb_data); |
| if (err < 0) |
| { |
| sclose(s); |
| return err; |
| } |
| } |
| |
| SOCK_STATE_CALLBACK(channel, s, 1, 0); |
| |
| server->udp_socket = s; |
| return 0; |
| } |
| |
| static int same_questions(const unsigned char *qbuf, int qlen, |
| const unsigned char *abuf, int alen) |
| { |
| struct { |
| const unsigned char *p; |
| int qdcount; |
| char *name; |
| long namelen; |
| int type; |
| int dnsclass; |
| } q, a; |
| int i, j; |
| |
| if (qlen < HFIXEDSZ || alen < HFIXEDSZ) |
| return 0; |
| |
| /* Extract qdcount from the request and reply buffers and compare them. */ |
| q.qdcount = DNS_HEADER_QDCOUNT(qbuf); |
| a.qdcount = DNS_HEADER_QDCOUNT(abuf); |
| if (q.qdcount != a.qdcount) |
| return 0; |
| |
| /* For each question in qbuf, find it in abuf. */ |
| q.p = qbuf + HFIXEDSZ; |
| for (i = 0; i < q.qdcount; i++) |
| { |
| /* Decode the question in the query. */ |
| if (ares_expand_name(q.p, qbuf, qlen, &q.name, &q.namelen) |
| != ARES_SUCCESS) |
| return 0; |
| q.p += q.namelen; |
| if (q.p + QFIXEDSZ > qbuf + qlen) |
| { |
| free(q.name); |
| return 0; |
| } |
| q.type = DNS_QUESTION_TYPE(q.p); |
| q.dnsclass = DNS_QUESTION_CLASS(q.p); |
| q.p += QFIXEDSZ; |
| |
| /* Search for this question in the answer. */ |
| a.p = abuf + HFIXEDSZ; |
| for (j = 0; j < a.qdcount; j++) |
| { |
| /* Decode the question in the answer. */ |
| if (ares_expand_name(a.p, abuf, alen, &a.name, &a.namelen) |
| != ARES_SUCCESS) |
| { |
| free(q.name); |
| return 0; |
| } |
| a.p += a.namelen; |
| if (a.p + QFIXEDSZ > abuf + alen) |
| { |
| free(q.name); |
| free(a.name); |
| return 0; |
| } |
| a.type = DNS_QUESTION_TYPE(a.p); |
| a.dnsclass = DNS_QUESTION_CLASS(a.p); |
| a.p += QFIXEDSZ; |
| |
| /* Compare the decoded questions. */ |
| if (strcasecmp(q.name, a.name) == 0 && q.type == a.type |
| && q.dnsclass == a.dnsclass) |
| { |
| free(a.name); |
| break; |
| } |
| free(a.name); |
| } |
| |
| free(q.name); |
| if (j == a.qdcount) |
| return 0; |
| } |
| return 1; |
| } |
| |
| static int same_address(struct sockaddr *sa, struct ares_addr *aa) |
| { |
| void *addr1; |
| void *addr2; |
| |
| if (sa->sa_family == aa->family) |
| { |
| switch (aa->family) |
| { |
| case AF_INET: |
| addr1 = &aa->addrV4; |
| addr2 = &((struct sockaddr_in *)sa)->sin_addr; |
| if (memcmp(addr1, addr2, sizeof(aa->addrV4)) == 0) |
| return 1; /* match */ |
| break; |
| case AF_INET6: |
| addr1 = &aa->addrV6; |
| addr2 = &((struct sockaddr_in6 *)sa)->sin6_addr; |
| if (memcmp(addr1, addr2, sizeof(aa->addrV6)) == 0) |
| return 1; /* match */ |
| break; |
| default: |
| break; |
| } |
| } |
| return 0; /* different */ |
| } |
| |
| static void end_query (ares_channel channel, struct query *query, int status, |
| unsigned char *abuf, int alen) |
| { |
| int i; |
| |
| /* First we check to see if this query ended while one of our send |
| * queues still has pointers to it. |
| */ |
| for (i = 0; i < channel->nservers; i++) |
| { |
| struct server_state *server = &channel->servers[i]; |
| struct send_request *sendreq; |
| for (sendreq = server->qhead; sendreq; sendreq = sendreq->next) |
| if (sendreq->owner_query == query) |
| { |
| sendreq->owner_query = NULL; |
| assert(sendreq->data_storage == NULL); |
| if (status == ARES_SUCCESS) |
| { |
| /* We got a reply for this query, but this queued |
| * sendreq points into this soon-to-be-gone query's |
| * tcpbuf. Probably this means we timed out and queued |
| * the query for retransmission, then received a |
| * response before actually retransmitting. This is |
| * perfectly fine, so we want to keep the connection |
| * running smoothly if we can. But in the worst case |
| * we may have sent only some prefix of the query, |
| * with some suffix of the query left to send. Also, |
| * the buffer may be queued on multiple queues. To |
| * prevent dangling pointers to the query's tcpbuf and |
| * handle these cases, we just give such sendreqs |
| * their own copy of the query packet. |
| */ |
| sendreq->data_storage = malloc(sendreq->len); |
| if (sendreq->data_storage != NULL) |
| { |
| memcpy(sendreq->data_storage, sendreq->data, sendreq->len); |
| sendreq->data = sendreq->data_storage; |
| } |
| } |
| if ((status != ARES_SUCCESS) || (sendreq->data_storage == NULL)) |
| { |
| /* We encountered an error (probably a timeout, |
| * suggesting the DNS server we're talking to is |
| * probably unreachable, wedged, or severely |
| * overloaded) or we couldn't copy the request, so |
| * mark the connection as broken. When we get to |
| * process_broken_connections() we'll close the |
| * connection and try to re-send requests to another |
| * server. |
| */ |
| server->is_broken = 1; |
| /* Just to be paranoid, zero out this sendreq... */ |
| sendreq->data = NULL; |
| sendreq->len = 0; |
| } |
| } |
| } |
| |
| /* Invoke the callback */ |
| query->callback(query->arg, status, query->timeouts, abuf, alen); |
| ares__free_query(query); |
| |
| /* Simple cleanup policy: if no queries are remaining, close all |
| * network sockets unless STAYOPEN is set. |
| */ |
| if (!(channel->flags & ARES_FLAG_STAYOPEN) && |
| ares__is_list_empty(&(channel->all_queries))) |
| { |
| for (i = 0; i < channel->nservers; i++) |
| ares__close_sockets(channel, &channel->servers[i]); |
| } |
| } |
| |
| void ares__free_query(struct query *query) |
| { |
| /* Remove the query from all the lists in which it is linked */ |
| ares__remove_from_list(&(query->queries_by_qid)); |
| ares__remove_from_list(&(query->queries_by_timeout)); |
| ares__remove_from_list(&(query->queries_to_server)); |
| ares__remove_from_list(&(query->all_queries)); |
| /* Zero out some important stuff, to help catch bugs */ |
| query->callback = NULL; |
| query->arg = NULL; |
| /* Deallocate the memory associated with the query */ |
| free(query->tcpbuf); |
| free(query->server_info); |
| free(query); |
| } |