4 * Transmission Control Protocol, incoming traffic
6 * The input processing functions of TCP.
8 * These functions are generally called in the order (ip_input() ->) tcp_input() ->
9 * tcp_process() -> tcp_receive() (-> application).
14 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without modification,
18 * are permitted provided that the following conditions are met:
20 * 1. Redistributions of source code must retain the above copyright notice,
21 * this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright notice,
23 * this list of conditions and the following disclaimer in the documentation
24 * and/or other materials provided with the distribution.
25 * 3. The name of the author may not be used to endorse or promote products
26 * derived from this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
29 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
30 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
31 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
33 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
39 * This file is part of the lwIP TCP/IP stack.
41 * Author: Adam Dunkels <adam@sics.se>
48 #include "lwip/ip_addr.h"
49 #include "lwip/netif.h"
51 #include "lwip/memp.h"
53 #include "lwip/inet.h"
56 #include "lwip/stats.h"
58 #include "arch/perf.h"
60 /* These variables are global to all functions involved in the input
61 processing of TCP segments. They are set by the tcp_input()
63 static struct tcp_seg inseg;
64 static struct tcp_hdr *tcphdr;
65 static struct ip_hdr *iphdr;
66 static u32_t seqno, ackno;
70 static u8_t recv_flags;
71 static struct pbuf *recv_data;
73 struct tcp_pcb *tcp_input_pcb;
75 /* Forward declarations. */
76 static err_t tcp_process(struct tcp_pcb *pcb);
77 static void tcp_receive(struct tcp_pcb *pcb);
78 static void tcp_parseopt(struct tcp_pcb *pcb);
80 static err_t tcp_listen_input(struct tcp_pcb_listen *pcb);
81 static err_t tcp_timewait_input(struct tcp_pcb *pcb);
86 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
87 * the segment between the PCBs and passes it on to tcp_process(), which implements
88 * the TCP finite state machine. This function is called by the IP layer (in
93 tcp_input(struct pbuf *p, struct netif *inp)
95 struct tcp_pcb *pcb, *prev;
96 struct tcp_pcb_listen *lpcb;
101 struct tcp_pcb *pcb_temp;
104 #endif /* SO_REUSE */
108 TCP_STATS_INC(tcp.recv);
111 tcphdr = (struct tcp_hdr *)((u8_t *)p->payload + IPH_HL(iphdr) * 4);
114 tcp_debug_print(tcphdr);
117 /* remove header from payload */
118 if (pbuf_header(p, -((s16_t)(IPH_HL(iphdr) * 4))) || (p->tot_len < sizeof(struct tcp_hdr))) {
119 /* drop short packets */
120 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: short packet (%u bytes) discarded\n", p->tot_len));
121 TCP_STATS_INC(tcp.lenerr);
122 TCP_STATS_INC(tcp.drop);
127 /* Don't even process incoming broadcasts/multicasts. */
128 if (ip_addr_isbroadcast(&(iphdr->dest), inp) ||
129 ip_addr_ismulticast(&(iphdr->dest))) {
134 #if CHECKSUM_CHECK_TCP
135 /* Verify TCP checksum. */
136 if (inet_chksum_pseudo(p, (struct ip_addr *)&(iphdr->src),
137 (struct ip_addr *)&(iphdr->dest),
138 IP_PROTO_TCP, p->tot_len) != 0) {
139 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum 0x%04x\n",
140 inet_chksum_pseudo(p, (struct ip_addr *)&(iphdr->src), (struct ip_addr *)&(iphdr->dest),
141 IP_PROTO_TCP, p->tot_len)));
143 tcp_debug_print(tcphdr);
144 #endif /* TCP_DEBUG */
145 TCP_STATS_INC(tcp.chkerr);
146 TCP_STATS_INC(tcp.drop);
153 /* Move the payload pointer in the pbuf so that it points to the
154 TCP data instead of the TCP header. */
155 hdrlen = TCPH_HDRLEN(tcphdr);
156 pbuf_header(p, -(hdrlen * 4));
158 /* Convert fields in TCP header to host byte order. */
159 tcphdr->src = ntohs(tcphdr->src);
160 tcphdr->dest = ntohs(tcphdr->dest);
161 seqno = tcphdr->seqno = ntohl(tcphdr->seqno);
162 ackno = tcphdr->ackno = ntohl(tcphdr->ackno);
163 tcphdr->wnd = ntohs(tcphdr->wnd);
165 flags = TCPH_FLAGS(tcphdr) & TCP_FLAGS;
166 tcplen = p->tot_len + ((flags & TCP_FIN || flags & TCP_SYN)? 1: 0);
168 /* Demultiplex an incoming segment. First, we check if it is destined
169 for an active connection. */
173 pcb_temp = tcp_active_pcbs;
177 /* Iterate through the TCP pcb list for a fully matching pcb */
178 for(pcb = pcb_temp; pcb != NULL; pcb = pcb->next) {
180 for(pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
181 #endif /* SO_REUSE */
182 LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb->state != CLOSED);
183 LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT);
184 LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb->state != LISTEN);
185 if (pcb->remote_port == tcphdr->src &&
186 pcb->local_port == tcphdr->dest &&
187 ip_addr_cmp(&(pcb->remote_ip), &(iphdr->src)) &&
188 ip_addr_cmp(&(pcb->local_ip), &(iphdr->dest))) {
191 if(pcb->so_options & SOF_REUSEPORT) {
193 /* We processed one PCB already */
194 LWIP_DEBUGF(TCP_INPUT_DEBUG,("tcp_input: second or later PCB and SOF_REUSEPORT set.\n"));
196 /* First PCB with this address */
197 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: first PCB and SOF_REUSEPORT set.\n"));
204 /* We want to search on next socket after receiving */
205 pcb_temp = pcb->next;
207 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: reference counter on PBUF set to %i\n", p->ref));
210 /* We processed one PCB already */
211 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: second or later PCB but SOF_REUSEPORT not set !\n"));
214 #endif /* SO_REUSE */
216 /* Move this PCB to the front of the list so that subsequent
217 lookups will be faster (we exploit locality in TCP segment
219 LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb->next != pcb);
221 prev->next = pcb->next;
222 pcb->next = tcp_active_pcbs;
223 tcp_active_pcbs = pcb;
225 LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb->next != pcb);
232 /* If it did not go to an active connection, we check the connections
233 in the TIME-WAIT state. */
235 for(pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
236 LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
237 if (pcb->remote_port == tcphdr->src &&
238 pcb->local_port == tcphdr->dest &&
239 ip_addr_cmp(&(pcb->remote_ip), &(iphdr->src)) &&
240 ip_addr_cmp(&(pcb->local_ip), &(iphdr->dest))) {
241 /* We don't really care enough to move this PCB to the front
242 of the list since we are not very likely to receive that
243 many segments for connections in TIME-WAIT. */
244 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for TIME_WAITing connection.\n"));
245 tcp_timewait_input(pcb);
251 /* Finally, if we still did not get a match, we check all PCBs that
252 are LISTENing for incoming connections. */
254 for(lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
255 if ((ip_addr_isany(&(lpcb->local_ip)) ||
256 ip_addr_cmp(&(lpcb->local_ip), &(iphdr->dest))) &&
257 lpcb->local_port == tcphdr->dest) {
258 /* Move this PCB to the front of the list so that subsequent
259 lookups will be faster (we exploit locality in TCP segment
262 ((struct tcp_pcb_listen *)prev)->next = lpcb->next;
263 /* our successor is the remainder of the listening list */
264 lpcb->next = tcp_listen_pcbs.listen_pcbs;
265 /* put this listening pcb at the head of the listening list */
266 tcp_listen_pcbs.listen_pcbs = lpcb;
269 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for LISTENing connection.\n"));
270 tcp_listen_input(lpcb);
274 prev = (struct tcp_pcb *)lpcb;
279 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
280 tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
281 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
282 #endif /* TCP_INPUT_DEBUG */
286 /* The incoming segment belongs to a connection. */
289 tcp_debug_print_state(pcb->state);
290 #endif /* TCP_DEBUG */
291 #endif /* TCP_INPUT_DEBUG */
293 /* Set up a tcp_seg structure. */
295 inseg.len = p->tot_len;
296 inseg.dataptr = p->payload;
298 inseg.tcphdr = tcphdr;
304 err = tcp_process(pcb);
305 tcp_input_pcb = NULL;
306 /* A return value of ERR_ABRT means that tcp_abort() was called
307 and that the pcb has been freed. If so, we don't do anything. */
308 if (err != ERR_ABRT) {
309 if (recv_flags & TF_RESET) {
310 /* TF_RESET means that the connection was reset by the other
311 end. We then call the error callback to inform the
312 application that the connection is dead before we
313 deallocate the PCB. */
314 TCP_EVENT_ERR(pcb->errf, pcb->callback_arg, ERR_RST);
315 tcp_pcb_remove(&tcp_active_pcbs, pcb);
316 memp_free(MEMP_TCP_PCB, pcb);
317 } else if (recv_flags & TF_CLOSED) {
318 /* The connection has been closed and we will deallocate the
320 tcp_pcb_remove(&tcp_active_pcbs, pcb);
321 memp_free(MEMP_TCP_PCB, pcb);
324 /* If the application has registered a "sent" function to be
325 called when new send buffer space is available, we call it
327 if (pcb->acked > 0) {
328 TCP_EVENT_SENT(pcb, pcb->acked, err);
331 if (recv_data != NULL) {
332 /* Notify application that data has been received. */
333 TCP_EVENT_RECV(pcb, recv_data, ERR_OK, err);
336 /* If a FIN segment was received, we call the callback
337 function with a NULL buffer to indicate EOF. */
338 if (recv_flags & TF_GOT_FIN) {
339 TCP_EVENT_RECV(pcb, NULL, ERR_OK, err);
341 /* If there were no errors, we try to send something out. */
349 /* We deallocate the incoming pbuf. If it was buffered by the
350 application, the application should have called pbuf_ref() to
351 increase the reference counter in the pbuf. If so, the buffer
352 isn't actually deallocated by the call to pbuf_free(), only the
353 reference count is decreased. */
354 if (inseg.p != NULL) pbuf_free(inseg.p);
357 tcp_debug_print_state(pcb->state);
358 #endif /* TCP_DEBUG */
359 #endif /* TCP_INPUT_DEBUG */
361 /* First socket should receive now */
363 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: searching next PCB.\n"));
366 /* We are searching connected sockets */
369 #endif /* SO_REUSE */
374 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: freeing PBUF with reference counter set to %i\n", p->ref));
378 #endif /* SO_REUSE */
379 /* If no matching PCB was found, send a TCP RST (reset) to the
381 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_input: no PCB match found, resetting.\n"));
382 if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) {
383 TCP_STATS_INC(tcp.proterr);
384 TCP_STATS_INC(tcp.drop);
385 tcp_rst(ackno, seqno + tcplen,
386 &(iphdr->dest), &(iphdr->src),
387 tcphdr->dest, tcphdr->src);
393 #endif /* SO_REUSE */
394 LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
395 PERF_STOP("tcp_input");
398 /* tcp_listen_input():
400 * Called by tcp_input() when a segment arrives for a listening
405 tcp_listen_input(struct tcp_pcb_listen *pcb)
407 struct tcp_pcb *npcb;
410 /* In the LISTEN state, we check for incoming SYN segments,
411 creates a new PCB, and responds with a SYN|ACK. */
412 if (flags & TCP_ACK) {
413 /* For incoming segments with the ACK flag set, respond with a
415 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
416 tcp_rst(ackno + 1, seqno + tcplen,
417 &(iphdr->dest), &(iphdr->src),
418 tcphdr->dest, tcphdr->src);
419 } else if (flags & TCP_SYN) {
420 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection request %u -> %u.\n", tcphdr->src, tcphdr->dest));
421 npcb = tcp_alloc(pcb->prio);
422 /* If a new PCB could not be created (probably due to lack of memory),
423 we don't do anything, but rely on the sender will retransmit the
424 SYN at a time when we have more memory available. */
426 LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: could not allocate PCB\n"));
427 TCP_STATS_INC(tcp.memerr);
430 /* Set up the new PCB. */
431 ip_addr_set(&(npcb->local_ip), &(iphdr->dest));
432 npcb->local_port = pcb->local_port;
433 ip_addr_set(&(npcb->remote_ip), &(iphdr->src));
434 npcb->remote_port = tcphdr->src;
435 npcb->state = SYN_RCVD;
436 npcb->rcv_nxt = seqno + 1;
437 npcb->snd_wnd = tcphdr->wnd;
438 npcb->ssthresh = npcb->snd_wnd;
439 npcb->snd_wl1 = seqno - 1;/* initialise to seqno-1 to force window update */
440 npcb->callback_arg = pcb->callback_arg;
441 #if LWIP_CALLBACK_API
442 npcb->accept = pcb->accept;
443 #endif /* LWIP_CALLBACK_API */
444 /* inherit socket options */
445 npcb->so_options = pcb->so_options & (SOF_DEBUG|SOF_DONTROUTE|SOF_KEEPALIVE|SOF_OOBINLINE|SOF_LINGER);
446 /* Register the new PCB so that we can begin receiving segments
448 TCP_REG(&tcp_active_pcbs, npcb);
450 /* Parse any options in the SYN. */
453 /* Build an MSS option. */
454 optdata = htonl(((u32_t)2 << 24) |
456 (((u32_t)npcb->mss / 256) << 8) |
458 /* Send a SYN|ACK together with the MSS option. */
459 tcp_enqueue(npcb, NULL, 0, TCP_SYN | TCP_ACK, 0, (u8_t *)&optdata, 4);
460 return tcp_output(npcb);
465 /* tcp_timewait_input():
467 * Called by tcp_input() when a segment arrives for a connection in
472 tcp_timewait_input(struct tcp_pcb *pcb)
474 if (TCP_SEQ_GT(seqno + tcplen, pcb->rcv_nxt)) {
475 pcb->rcv_nxt = seqno + tcplen;
480 return tcp_output(pcb);
485 * Implements the TCP state machine. Called by tcp_input. In some
486 * states tcp_receive() is called to receive data. The tcp_seg
487 * argument will be freed by the caller (tcp_input()) unless the
488 * recv_data pointer in the pcb is set.
492 tcp_process(struct tcp_pcb *pcb)
494 struct tcp_seg *rseg;
501 /* Process incoming RST segments. */
502 if (flags & TCP_RST) {
503 /* First, determine if the reset is acceptable. */
504 if (pcb->state == SYN_SENT) {
505 if (ackno == pcb->snd_nxt) {
509 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
510 TCP_SEQ_LEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
512 if(TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt+pcb->rcv_wnd)){
518 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: Connection RESET\n"));
519 LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb->state != CLOSED);
520 recv_flags = TF_RESET;
521 pcb->flags &= ~TF_ACK_DELAY;
524 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: unacceptable reset seqno %lu rcv_nxt %lu\n",
525 seqno, pcb->rcv_nxt));
526 LWIP_DEBUGF(TCP_DEBUG, ("tcp_process: unacceptable reset seqno %lu rcv_nxt %lu\n",
527 seqno, pcb->rcv_nxt));
532 /* Update the PCB (in)activity timer. */
533 pcb->tmr = tcp_ticks;
536 /* Do different things depending on the TCP state. */
537 switch (pcb->state) {
539 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("SYN-SENT: ackno %lu pcb->snd_nxt %lu unacked %lu\n", ackno,
540 pcb->snd_nxt, ntohl(pcb->unacked->tcphdr->seqno)));
541 if ((flags & TCP_ACK) && (flags & TCP_SYN)
542 && ackno == ntohl(pcb->unacked->tcphdr->seqno) + 1) {
544 pcb->rcv_nxt = seqno + 1;
545 pcb->lastack = ackno;
546 pcb->snd_wnd = tcphdr->wnd;
547 pcb->snd_wl1 = seqno - 1; /* initialise to seqno - 1 to force window update */
548 pcb->state = ESTABLISHED;
549 pcb->cwnd = pcb->mss;
551 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_process: SYN-SENT --queuelen %u\n", (unsigned int)pcb->snd_queuelen));
553 pcb->unacked = rseg->next;
556 /* Parse any options in the SYNACK. */
559 /* Call the user specified function to call when sucessfully
561 TCP_EVENT_CONNECTED(pcb, ERR_OK, err);
566 if (flags & TCP_ACK &&
567 !(flags & TCP_RST)) {
568 /*if (TCP_SEQ_LT(pcb->lastack, ackno) &&
569 TCP_SEQ_LEQ(ackno, pcb->snd_nxt)) { */
570 if(TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_nxt)){
571 pcb->state = ESTABLISHED;
572 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection established %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
573 #if LWIP_CALLBACK_API
574 LWIP_ASSERT("pcb->accept != NULL", pcb->accept != NULL);
576 /* Call the accept function. */
577 TCP_EVENT_ACCEPT(pcb, ERR_OK, err);
579 /* If the accept function returns with an error, we abort
584 /* If there was any data contained within this ACK,
585 * we'd better pass it on to the application as well. */
587 pcb->cwnd = pcb->mss;
595 if (flags & TCP_FIN) {
597 pcb->state = CLOSE_WAIT;
602 if (flags & TCP_FIN) {
603 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
604 LWIP_DEBUGF(TCP_DEBUG,
605 ("TCP connection closed %d -> %d.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
608 TCP_RMV(&tcp_active_pcbs, pcb);
609 pcb->state = TIME_WAIT;
610 TCP_REG(&tcp_tw_pcbs, pcb);
613 pcb->state = CLOSING;
615 } else if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
616 pcb->state = FIN_WAIT_2;
621 if (flags & TCP_FIN) {
622 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
625 TCP_RMV(&tcp_active_pcbs, pcb);
626 pcb->state = TIME_WAIT;
627 TCP_REG(&tcp_tw_pcbs, pcb);
632 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
633 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
636 TCP_RMV(&tcp_active_pcbs, pcb);
637 pcb->state = TIME_WAIT;
638 TCP_REG(&tcp_tw_pcbs, pcb);
643 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
644 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
646 recv_flags = TF_CLOSED;
658 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
659 * data, and if so frees the memory of the buffered data. Next, is places the
660 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
661 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
662 * i it has been removed from the buffer.
664 * If the incoming segment constitutes an ACK for a segment that was used for RTT
665 * estimation, the RTT is estimated here as well.
669 tcp_receive(struct tcp_pcb *pcb)
671 struct tcp_seg *next;
673 struct tcp_seg *prev, *cseg;
678 u32_t right_wnd_edge;
682 if (flags & TCP_ACK) {
683 right_wnd_edge = pcb->snd_wnd + pcb->snd_wl1;
686 if (TCP_SEQ_LT(pcb->snd_wl1, seqno) ||
687 (pcb->snd_wl1 == seqno && TCP_SEQ_LT(pcb->snd_wl2, ackno)) ||
688 (pcb->snd_wl2 == ackno && tcphdr->wnd > pcb->snd_wnd)) {
689 pcb->snd_wnd = tcphdr->wnd;
690 pcb->snd_wl1 = seqno;
691 pcb->snd_wl2 = ackno;
692 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: window update %lu\n", pcb->snd_wnd));
695 if (pcb->snd_wnd != tcphdr->wnd) {
696 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: no window update lastack %lu snd_max %lu ackno %lu wl1 %lu seqno %lu wl2 %lu\n",
697 pcb->lastack, pcb->snd_max, ackno, pcb->snd_wl1, seqno, pcb->snd_wl2));
699 #endif /* TCP_WND_DEBUG */
703 if (pcb->lastack == ackno) {
706 if (pcb->snd_wl1 + pcb->snd_wnd == right_wnd_edge){
708 if (pcb->dupacks >= 3 && pcb->unacked != NULL) {
709 if (!(pcb->flags & TF_INFR)) {
710 /* This is fast retransmit. Retransmit the first unacked segment. */
711 LWIP_DEBUGF(TCP_FR_DEBUG, ("tcp_receive: dupacks %u (%lu), fast retransmit %lu\n",
712 (unsigned int)pcb->dupacks, pcb->lastack,
713 ntohl(pcb->unacked->tcphdr->seqno)));
715 /* Set ssthresh to max (FlightSize / 2, 2*SMSS) */
716 /*pcb->ssthresh = LWIP_MAX((pcb->snd_max -
719 /* Set ssthresh to half of the minimum of the currenct cwnd and the advertised window */
720 if(pcb->cwnd > pcb->snd_wnd)
721 pcb->ssthresh = pcb->snd_wnd / 2;
723 pcb->ssthresh = pcb->cwnd / 2;
725 pcb->cwnd = pcb->ssthresh + 3 * pcb->mss;
726 pcb->flags |= TF_INFR;
728 /* Inflate the congestion window, but not if it means that
729 the value overflows. */
730 if ((u16_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
731 pcb->cwnd += pcb->mss;
736 LWIP_DEBUGF(TCP_FR_DEBUG, ("tcp_receive: dupack averted %lu %lu\n",
737 pcb->snd_wl1 + pcb->snd_wnd, right_wnd_edge));
740 /*if (TCP_SEQ_LT(pcb->lastack, ackno) &&
741 TCP_SEQ_LEQ(ackno, pcb->snd_max)) { */
742 if(TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_max)){
743 /* We come here when the ACK acknowledges new data. */
745 /* Reset the "IN Fast Retransmit" flag, since we are no longer
746 in fast retransmit. Also reset the congestion window to the
747 slow start threshold. */
748 if (pcb->flags & TF_INFR) {
749 pcb->flags &= ~TF_INFR;
750 pcb->cwnd = pcb->ssthresh;
753 /* Reset the number of retransmissions. */
756 /* Reset the retransmission time-out. */
757 pcb->rto = (pcb->sa >> 3) + pcb->sv;
759 /* Update the send buffer space. */
760 pcb->acked = ackno - pcb->lastack;
761 pcb->snd_buf += pcb->acked;
763 /* Reset the fast retransmit variables. */
765 pcb->lastack = ackno;
767 /* Update the congestion control variables (cwnd and
769 if (pcb->state >= ESTABLISHED) {
770 if (pcb->cwnd < pcb->ssthresh) {
771 if ((u16_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
772 pcb->cwnd += pcb->mss;
774 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %u\n", pcb->cwnd));
776 u16_t new_cwnd = (pcb->cwnd + pcb->mss * pcb->mss / pcb->cwnd);
777 if (new_cwnd > pcb->cwnd) {
778 pcb->cwnd = new_cwnd;
780 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: congestion avoidance cwnd %u\n", pcb->cwnd));
783 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: ACK for %lu, unacked->seqno %lu:%lu\n",
785 pcb->unacked != NULL?
786 ntohl(pcb->unacked->tcphdr->seqno): 0,
787 pcb->unacked != NULL?
788 ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked): 0));
790 /* Remove segment from the unacknowledged list if the incoming
791 ACK acknowlegdes them. */
792 while (pcb->unacked != NULL &&
793 TCP_SEQ_LEQ(ntohl(pcb->unacked->tcphdr->seqno) +
794 TCP_TCPLEN(pcb->unacked), ackno)) {
795 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %lu:%lu from pcb->unacked\n",
796 ntohl(pcb->unacked->tcphdr->seqno),
797 ntohl(pcb->unacked->tcphdr->seqno) +
798 TCP_TCPLEN(pcb->unacked)));
801 pcb->unacked = pcb->unacked->next;
803 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %u ... ", (unsigned int)pcb->snd_queuelen));
804 pcb->snd_queuelen -= pbuf_clen(next->p);
807 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%u (after freeing unacked)\n", (unsigned int)pcb->snd_queuelen));
808 if (pcb->snd_queuelen != 0) {
809 LWIP_ASSERT("tcp_receive: valid queue length", pcb->unacked != NULL ||
810 pcb->unsent != NULL);
816 /* We go through the ->unsent list to see if any of the segments
817 on the list are acknowledged by the ACK. This may seem
818 strange since an "unsent" segment shouldn't be acked. The
819 rationale is that lwIP puts all outstanding segments on the
820 ->unsent list after a retransmission, so these segments may
821 in fact have been sent once. */
822 while (pcb->unsent != NULL &&
823 /*TCP_SEQ_LEQ(ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent), ackno) &&
824 TCP_SEQ_LEQ(ackno, pcb->snd_max)*/
825 TCP_SEQ_BETWEEN(ackno, ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent), pcb->snd_max)
827 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %lu:%lu from pcb->unsent\n",
828 ntohl(pcb->unsent->tcphdr->seqno), ntohl(pcb->unsent->tcphdr->seqno) +
829 TCP_TCPLEN(pcb->unsent)));
832 pcb->unsent = pcb->unsent->next;
833 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %u ... ", (unsigned int)pcb->snd_queuelen));
834 pcb->snd_queuelen -= pbuf_clen(next->p);
836 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%u (after freeing unsent)\n", (unsigned int)pcb->snd_queuelen));
837 if (pcb->snd_queuelen != 0) {
838 LWIP_ASSERT("tcp_receive: valid queue length",
839 pcb->unacked != NULL || pcb->unsent != NULL);
842 if (pcb->unsent != NULL) {
843 pcb->snd_nxt = htonl(pcb->unsent->tcphdr->seqno);
846 /* End of ACK for new data processing. */
848 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: pcb->rttest %u rtseq %lu ackno %lu\n",
849 pcb->rttest, pcb->rtseq, ackno));
851 /* RTT estimation calculations. This is done by checking if the
852 incoming segment acknowledges the segment we use to take a
853 round-trip time measurement. */
854 if (pcb->rttest && TCP_SEQ_LT(pcb->rtseq, ackno)) {
855 m = tcp_ticks - pcb->rttest;
857 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: experienced rtt %u ticks (%u msec).\n",
858 m, m * TCP_SLOW_INTERVAL));
860 /* This is taken directly from VJs original code in his paper */
861 m = m - (pcb->sa >> 3);
866 m = m - (pcb->sv >> 2);
868 pcb->rto = (pcb->sa >> 3) + pcb->sv;
870 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: RTO %u (%u miliseconds)\n",
871 pcb->rto, pcb->rto * TCP_SLOW_INTERVAL));
877 /* If the incoming segment contains data, we must process it
880 /* This code basically does three things:
882 +) If the incoming segment contains data that is the next
883 in-sequence data, this data is passed to the application. This
884 might involve trimming the first edge of the data. The rcv_nxt
885 variable and the advertised window are adjusted.
887 +) If the incoming segment has data that is above the next
888 sequence number expected (->rcv_nxt), the segment is placed on
889 the ->ooseq queue. This is done by finding the appropriate
890 place in the ->ooseq queue (which is ordered by sequence
891 number) and trim the segment in both ends if needed. An
892 immediate ACK is sent to indicate that we received an
893 out-of-sequence segment.
895 +) Finally, we check if the first segment on the ->ooseq queue
896 now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
897 rcv_nxt > ooseq->seqno, we must trim the first edge of the
898 segment on ->ooseq before we adjust rcv_nxt. The data in the
899 segments that are now on sequence are chained onto the
900 incoming segment so that we only need to call the application
904 /* First, we check if we must trim the first edge. We have to do
905 this if the sequence number of the incoming segment is less
906 than rcv_nxt, and the sequence number plus the length of the
907 segment is larger than rcv_nxt. */
908 /* if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)){
909 if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {*/
910 if(TCP_SEQ_BETWEEN(pcb->rcv_nxt, seqno+1, seqno+tcplen-1)){
911 /* Trimming the first edge is done by pushing the payload
912 pointer in the pbuf downwards. This is somewhat tricky since
913 we do not want to discard the full contents of the pbuf up to
914 the new starting point of the data since we have to keep the
915 TCP header which is present in the first pbuf in the chain.
917 What is done is really quite a nasty hack: the first pbuf in
918 the pbuf chain is pointed to by inseg.p. Since we need to be
919 able to deallocate the whole pbuf, we cannot change this
920 inseg.p pointer to point to any of the later pbufs in the
921 chain. Instead, we point the ->payload pointer in the first
922 pbuf to data in one of the later pbufs. We also set the
923 inseg.data pointer to point to the right place. This way, the
924 ->p pointer will still point to the first pbuf, but the
925 ->p->payload pointer will point to data in another pbuf.
927 After we are done with adjusting the pbuf pointers we must
928 adjust the ->data pointer in the seg and the segment
931 off = pcb->rcv_nxt - seqno;
933 if (inseg.p->len < off) {
934 new_tot_len = inseg.p->tot_len - off;
935 while (p->len < off) {
937 /* KJM following line changed (with addition of new_tot_len var)
939 inseg.p->tot_len -= p->len; */
940 p->tot_len = new_tot_len;
944 pbuf_header(p, -off);
946 pbuf_header(inseg.p, -off);
948 /* KJM following line changed to use p->payload rather than inseg->p->payload
950 inseg.dataptr = p->payload;
951 inseg.len -= pcb->rcv_nxt - seqno;
952 inseg.tcphdr->seqno = seqno = pcb->rcv_nxt;
955 if(TCP_SEQ_LT(seqno, pcb->rcv_nxt)){
956 /* the whole segment is < rcv_nxt */
957 /* must be a duplicate of a packet that has already been correctly handled */
959 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: duplicate seqno %lu\n", seqno));
964 /* The sequence number must be within the window (above rcv_nxt
965 and below rcv_nxt + rcv_wnd) in order to be further
967 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
968 TCP_SEQ_LT(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
969 if(TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd - 1)){
970 if (pcb->rcv_nxt == seqno) {
971 /* The incoming segment is the next in sequence. We check if
972 we have to trim the end of the segment and update rcv_nxt
973 and pass the data to the application. */
975 if (pcb->ooseq != NULL &&
976 TCP_SEQ_LEQ(pcb->ooseq->tcphdr->seqno, seqno + inseg.len)) {
977 /* We have to trim the second edge of the incoming
979 inseg.len = pcb->ooseq->tcphdr->seqno - seqno;
980 pbuf_realloc(inseg.p, inseg.len);
982 #endif /* TCP_QUEUE_OOSEQ */
984 tcplen = TCP_TCPLEN(&inseg);
986 pcb->rcv_nxt += tcplen;
988 /* Update the receiver's (our) window. */
989 if (pcb->rcv_wnd < tcplen) {
992 pcb->rcv_wnd -= tcplen;
995 /* If there is data in the segment, we make preparations to
996 pass this up to the application. The ->recv_data variable
997 is used for holding the pbuf that goes to the
998 application. The code for reassembling out-of-sequence data
999 chains its data on this pbuf as well.
1001 If the segment was a FIN, we set the TF_GOT_FIN flag that will
1002 be used to indicate to the application that the remote side has
1003 closed its end of the connection. */
1004 if (inseg.p->tot_len > 0) {
1005 recv_data = inseg.p;
1006 /* Since this pbuf now is the responsibility of the
1007 application, we delete our reference to it so that we won't
1008 (mistakingly) deallocate it. */
1011 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
1012 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: received FIN.\n"));
1013 recv_flags = TF_GOT_FIN;
1017 /* We now check if we have segments on the ->ooseq queue that
1018 is now in sequence. */
1019 while (pcb->ooseq != NULL &&
1020 pcb->ooseq->tcphdr->seqno == pcb->rcv_nxt) {
1023 seqno = pcb->ooseq->tcphdr->seqno;
1025 pcb->rcv_nxt += TCP_TCPLEN(cseg);
1026 if (pcb->rcv_wnd < TCP_TCPLEN(cseg)) {
1029 pcb->rcv_wnd -= TCP_TCPLEN(cseg);
1031 if (cseg->p->tot_len > 0) {
1032 /* Chain this pbuf onto the pbuf that we will pass to
1035 pbuf_cat(recv_data, cseg->p);
1037 recv_data = cseg->p;
1041 if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
1042 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: dequeued FIN.\n"));
1043 recv_flags = TF_GOT_FIN;
1047 pcb->ooseq = cseg->next;
1050 #endif /* TCP_QUEUE_OOSEQ */
1053 /* Acknowledge the segment(s). */
1057 /* We get here if the incoming segment is out-of-sequence. */
1060 /* We queue the segment on the ->ooseq queue. */
1061 if (pcb->ooseq == NULL) {
1062 pcb->ooseq = tcp_seg_copy(&inseg);
1064 /* If the queue is not empty, we walk through the queue and
1065 try to find a place where the sequence number of the
1066 incoming segment is between the sequence numbers of the
1067 previous and the next segment on the ->ooseq queue. That is
1068 the place where we put the incoming segment. If needed, we
1069 trim the second edges of the previous and the incoming
1070 segment so that it will fit into the sequence.
1072 If the incoming segment has the same sequence number as a
1073 segment on the ->ooseq queue, we discard the segment that
1074 contains less data. */
1077 for(next = pcb->ooseq; next != NULL; next = next->next) {
1078 if (seqno == next->tcphdr->seqno) {
1079 /* The sequence number of the incoming segment is the
1080 same as the sequence number of the segment on
1081 ->ooseq. We check the lengths to see which one to
1083 if (inseg.len > next->len) {
1084 /* The incoming segment is larger than the old
1085 segment. We replace the old segment with the new
1087 cseg = tcp_seg_copy(&inseg);
1089 cseg->next = next->next;
1098 /* Either the lenghts are the same or the incoming
1099 segment was smaller than the old one; in either
1100 case, we ditch the incoming segment. */
1105 if (TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
1106 /* The sequence number of the incoming segment is lower
1107 than the sequence number of the first segment on the
1108 queue. We put the incoming segment first on the
1111 if (TCP_SEQ_GT(seqno + inseg.len, next->tcphdr->seqno)) {
1112 /* We need to trim the incoming segment. */
1113 inseg.len = next->tcphdr->seqno - seqno;
1114 pbuf_realloc(inseg.p, inseg.len);
1116 cseg = tcp_seg_copy(&inseg);
1124 /*if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
1125 TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {*/
1126 if(TCP_SEQ_BETWEEN(seqno, prev->tcphdr->seqno+1, next->tcphdr->seqno-1)){
1127 /* The sequence number of the incoming segment is in
1128 between the sequence numbers of the previous and
1129 the next segment on ->ooseq. We trim and insert the
1130 incoming segment and trim the previous segment, if
1132 if (TCP_SEQ_GT(seqno + inseg.len, next->tcphdr->seqno)) {
1133 /* We need to trim the incoming segment. */
1134 inseg.len = next->tcphdr->seqno - seqno;
1135 pbuf_realloc(inseg.p, inseg.len);
1138 cseg = tcp_seg_copy(&inseg);
1142 if (TCP_SEQ_GT(prev->tcphdr->seqno + prev->len, seqno)) {
1143 /* We need to trim the prev segment. */
1144 prev->len = seqno - prev->tcphdr->seqno;
1145 pbuf_realloc(prev->p, prev->len);
1150 /* If the "next" segment is the last segment on the
1151 ooseq queue, we add the incoming segment to the end
1153 if (next->next == NULL &&
1154 TCP_SEQ_GT(seqno, next->tcphdr->seqno)) {
1155 next->next = tcp_seg_copy(&inseg);
1156 if (next->next != NULL) {
1157 if (TCP_SEQ_GT(next->tcphdr->seqno + next->len, seqno)) {
1158 /* We need to trim the last segment. */
1159 next->len = seqno - next->tcphdr->seqno;
1160 pbuf_realloc(next->p, next->len);
1169 #endif /* TCP_QUEUE_OOSEQ */
1173 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1174 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1175 if(!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd-1)){
1180 /* Segments with length 0 is taken care of here. Segments that
1181 fall out of the window are ACKed. */
1182 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1183 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1184 if(!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd-1)){
1193 * Parses the options contained in the incoming segment. (Code taken
1194 * from uIP with only small changes.)
1199 tcp_parseopt(struct tcp_pcb *pcb)
1205 opts = (u8_t *)tcphdr + TCP_HLEN;
1207 /* Parse the TCP MSS option, if present. */
1208 if(TCPH_HDRLEN(tcphdr) > 0x5) {
1209 for(c = 0; c < (TCPH_HDRLEN(tcphdr) - 5) << 2 ;) {
1212 /* End of options. */
1214 } else if (opt == 0x01) {
1217 } else if (opt == 0x02 &&
1218 opts[c + 1] == 0x04) {
1219 /* An MSS option with the right option length. */
1220 mss = (opts[c + 2] << 8) | opts[c + 3];
1221 pcb->mss = mss > TCP_MSS? TCP_MSS: mss;
1223 /* And we are done processing options. */
1226 if (opts[c + 1] == 0) {
1227 /* If the length field is zero, the options are malformed
1228 and we don't process them further. */
1231 /* All other options have a length field, so that we easily
1232 can skip past them. */
1238 #endif /* LWIP_TCP */