From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-bk0-f43.google.com (mail-bk0-f43.google.com [209.85.214.43]) (using TLSv1 with cipher RC4-SHA (128/128 bits)) (Client CN "smtp.gmail.com", Issuer "Google Internet Authority" (verified OK)) by huchra.bufferbloat.net (Postfix) with ESMTPS id 4BC6C21F0FB; Mon, 14 May 2012 11:43:02 -0700 (PDT) Received: by bkty5 with SMTP id y5so9273088bkt.16 for ; Mon, 14 May 2012 11:43:00 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=subject:from:to:cc:in-reply-to:references:content-type:date :message-id:mime-version:x-mailer:content-transfer-encoding; bh=kfq0UM/9CLTTmhWvGMu2qPxQXt9Rk1HSAcn0sJRYs6U=; b=wnIIkXAilDkBXuetjJB76q4pf9qy6gDdSHH+xCvmP91LWCESxFJT+yGpku2JQIBWmH RWxyHFKhy3Azm6n883M7jer71C93O/WREN/po2PW2IVw/BHSOvLeDns+7IYQYkmwtTOG k29aQCwLsZKBvlqNVLcu7eMemvzd+4Y3WT+p4egurJxvsA3gaObmoTcz/CbF2M4ij8GF AGn1pi9MochYEdluTKltPlKGCHZLQ47G5n6v/vdoyBjSNnZU6n3y+iE5qI7WL98MC6MK TFYrtMmw9o/ij6d6+AcQYjhJZ5/skV5w3yhvIotMQAVe1JMZHWNkqF/yoAqvPIkeSTDG Ez4w== Received: by 10.205.132.13 with SMTP id hs13mr3432639bkc.78.1337020980134; Mon, 14 May 2012 11:43:00 -0700 (PDT) Received: from [192.168.178.86] ([74.125.122.49]) by mx.google.com with ESMTPS id 9sm36626777bku.9.2012.05.14.11.42.57 (version=SSLv3 cipher=OTHER); Mon, 14 May 2012 11:42:58 -0700 (PDT) From: Eric Dumazet To: Juliusz Chroboczek In-Reply-To: <1337020307.8512.599.camel@edumazet-glaptop> References: <4FB129EC.90200@gmail.com> <7imx5a3b4t.fsf@lanthane.pps.jussieu.fr> <1337018621.8512.592.camel@edumazet-glaptop> <7id366vcwb.fsf@lanthane.pps.jussieu.fr> <1337020307.8512.599.camel@edumazet-glaptop> Content-Type: text/plain; charset="UTF-8" Date: Mon, 14 May 2012 20:42:55 +0200 Message-ID: <1337020975.8512.602.camel@edumazet-glaptop> Mime-Version: 1.0 X-Mailer: Evolution 2.28.3 Content-Transfer-Encoding: 7bit Cc: codel@lists.bufferbloat.net, bloat@lists.bufferbloat.net Subject: Re: [Codel] [Bloat] better tc support for bittorrent/diffserv X-BeenThere: codel@lists.bufferbloat.net X-Mailman-Version: 2.1.13 Precedence: list List-Id: CoDel AQM discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 14 May 2012 18:43:03 -0000 On Mon, 2012-05-14 at 20:31 +0200, Eric Dumazet wrote: > On Mon, 2012-05-14 at 20:24 +0200, Juliusz Chroboczek wrote: > > > There is an API to set ECN on UDP packets. > > > > > > setsockopt() IP_TOS, with val = 2 > > > > Interesting, I didn't realise that. Is it also possible to read the ECN > > bits? > > > Yep > > man 7 ip > > IP_RECVTOS (since Linux 2.2) > If enabled the IP_TOS ancillary message is passed with incoming packets. > It contains a byte which specifies the Type of Service/Precedence field > of the packet header. Expects a boolean integer flag. > > Here is a quick and dirty UDP echo server (reflecting the TOS and correct src address on multi homed host ) #include #include #include #include #include #include #include #include #define PORT 4040 int pktinfo_get(struct msghdr *my_hdr, struct in_pktinfo *pktinfo) { int res = -1; if (my_hdr->msg_controllen > 0) { struct cmsghdr *get_cmsg; for (get_cmsg = CMSG_FIRSTHDR(my_hdr); get_cmsg; get_cmsg = CMSG_NXTHDR(my_hdr, get_cmsg)) { if (get_cmsg->cmsg_type == IP_PKTINFO) { struct in_pktinfo *get_pktinfo = (struct in_pktinfo *)CMSG_DATA(get_cmsg); memcpy(pktinfo, get_pktinfo, sizeof(*pktinfo)); res = 0; } } } return res; } int tos_get(struct msghdr *my_hdr, unsigned char *tos) { int res = -1; if (my_hdr->msg_controllen > 0) { struct cmsghdr *get_cmsg; for (get_cmsg = CMSG_FIRSTHDR(my_hdr); get_cmsg; get_cmsg = CMSG_NXTHDR(my_hdr, get_cmsg)) { if (get_cmsg->cmsg_type == IP_TOS) { unsigned char *pkttos = (unsigned char *)CMSG_DATA(get_cmsg); *tos = *pkttos; res = 0; } } } return res; } int main(int argc, char *argv[]) { int fd = socket(AF_INET, SOCK_DGRAM, 0); struct sockaddr_in addr, rem_addr; int res, on = 1; struct msghdr msghdr; struct iovec vec[1]; char cbuf[512]; char frame[4096]; struct in_pktinfo pktinfo; int c, count = 1000000; unsigned char last_tos = 0; while ((c = getopt(argc, argv, "c:")) != -1) { if (c == 'c') count = atoi(optarg); } memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = htons(PORT); if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { perror("bind"); return 1; } setsockopt(fd, SOL_IP, IP_PKTINFO, &on, sizeof(on)); setsockopt(fd, SOL_IP, IP_RECVTOS, &on, sizeof(on)); while (1) { unsigned char tos; memset(&msghdr, 0, sizeof(msghdr)); msghdr.msg_control = cbuf; msghdr.msg_controllen = sizeof(cbuf); msghdr.msg_iov = vec; msghdr.msg_iovlen = 1; vec[0].iov_base = frame; vec[0].iov_len = sizeof(frame); msghdr.msg_name = &rem_addr; msghdr.msg_namelen = sizeof(rem_addr); res = recvmsg(fd, &msghdr, 0); if (res == -1) break; if (pktinfo_get(&msghdr, &pktinfo) == 0) { // printf("Got IP_PKTINFO dst addr=%s\n", inet_ntoa(pktinfo.ipi_spec_dst)); } if (tos_get(&msghdr, &tos) == 0) { /* IP_TOS option wont be used in sendmsg(), we must use setsockopt() instead */ if (tos != last_tos) { if (setsockopt(fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) == 0) last_tos = tos; } } /* ok, just echo reply this frame. * Using sendmsg() will provide IP_PKTINFO back to kernel * to let it use the 'right' source address * (destination address of the incoming packet) */ vec[0].iov_len = res; sendmsg(fd, &msghdr, 0); if (--count == 0) break; } return 0; }