4 * Transmission Control Protocol, incoming traffic
6 * The input processing functions of the TCP layer.
8 * These functions are generally called in the order (ip_input() ->)
9 * tcp_input() -> * tcp_process() -> tcp_receive() (-> application).
14 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without modification,
18 * are permitted provided that the following conditions are met:
20 * 1. Redistributions of source code must retain the above copyright notice,
21 * this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright notice,
23 * this list of conditions and the following disclaimer in the documentation
24 * and/or other materials provided with the distribution.
25 * 3. The name of the author may not be used to endorse or promote products
26 * derived from this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
29 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
30 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
31 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
33 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
39 * This file is part of the lwIP TCP/IP stack.
41 * Author: Adam Dunkels <adam@sics.se>
48 #include "lwip/ip_addr.h"
49 #include "lwip/netif.h"
51 #include "lwip/memp.h"
53 #include "lwip/inet.h"
56 #include "lwip/stats.h"
57 #include "arch/perf.h"
58 #include "lwip/snmp.h"
61 /* These variables are global to all functions involved in the input
62 processing of TCP segments. They are set by the tcp_input()
64 static struct tcp_seg inseg;
65 static struct tcp_hdr *tcphdr;
66 static struct ip_hdr *iphdr;
67 static u32_t seqno, ackno;
71 static u8_t recv_flags;
72 static struct pbuf *recv_data;
74 struct tcp_pcb *tcp_input_pcb;
76 /* Forward declarations. */
77 static err_t tcp_process(struct tcp_pcb *pcb);
78 static u8_t tcp_receive(struct tcp_pcb *pcb);
79 static void tcp_parseopt(struct tcp_pcb *pcb);
81 static err_t tcp_listen_input(struct tcp_pcb_listen *pcb);
82 static err_t tcp_timewait_input(struct tcp_pcb *pcb);
86 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
87 * the segment between the PCBs and passes it on to tcp_process(), which implements
88 * the TCP finite state machine. This function is called by the IP layer (in
93 tcp_input(struct pbuf *p, struct netif *inp)
95 struct tcp_pcb *pcb, *prev;
96 struct tcp_pcb_listen *lpcb;
102 TCP_STATS_INC(tcp.recv);
103 snmp_inc_tcpinsegs();
106 tcphdr = (struct tcp_hdr *)((u8_t *)p->payload + IPH_HL(iphdr) * 4);
109 tcp_debug_print(tcphdr);
112 /* remove header from payload */
113 if (pbuf_header(p, -((s16_t)(IPH_HL(iphdr) * 4))) || (p->tot_len < sizeof(struct tcp_hdr))) {
114 /* drop short packets */
115 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: short packet (%"U16_F" bytes) discarded\n", p->tot_len));
116 TCP_STATS_INC(tcp.lenerr);
117 TCP_STATS_INC(tcp.drop);
122 /* Don't even process incoming broadcasts/multicasts. */
123 if (ip_addr_isbroadcast(&(iphdr->dest), inp) ||
124 ip_addr_ismulticast(&(iphdr->dest))) {
125 snmp_inc_tcpinerrs();
130 #if CHECKSUM_CHECK_TCP
131 /* Verify TCP checksum. */
132 if (inet_chksum_pseudo(p, (struct ip_addr *)&(iphdr->src),
133 (struct ip_addr *)&(iphdr->dest),
134 IP_PROTO_TCP, p->tot_len) != 0) {
135 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum 0x%04"X16_F"\n",
136 inet_chksum_pseudo(p, (struct ip_addr *)&(iphdr->src), (struct ip_addr *)&(iphdr->dest),
137 IP_PROTO_TCP, p->tot_len)));
139 tcp_debug_print(tcphdr);
140 #endif /* TCP_DEBUG */
141 TCP_STATS_INC(tcp.chkerr);
142 TCP_STATS_INC(tcp.drop);
143 snmp_inc_tcpinerrs();
149 /* Move the payload pointer in the pbuf so that it points to the
150 TCP data instead of the TCP header. */
151 hdrlen = TCPH_HDRLEN(tcphdr);
152 pbuf_header(p, -(hdrlen * 4));
154 /* Convert fields in TCP header to host byte order. */
155 tcphdr->src = ntohs(tcphdr->src);
156 tcphdr->dest = ntohs(tcphdr->dest);
157 seqno = tcphdr->seqno = ntohl(tcphdr->seqno);
158 ackno = tcphdr->ackno = ntohl(tcphdr->ackno);
159 tcphdr->wnd = ntohs(tcphdr->wnd);
161 flags = TCPH_FLAGS(tcphdr) & TCP_FLAGS;
162 tcplen = p->tot_len + ((flags & TCP_FIN || flags & TCP_SYN)? 1: 0);
164 /* Demultiplex an incoming segment. First, we check if it is destined
165 for an active connection. */
169 for(pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
170 LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb->state != CLOSED);
171 LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT);
172 LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb->state != LISTEN);
173 if (pcb->remote_port == tcphdr->src &&
174 pcb->local_port == tcphdr->dest &&
175 ip_addr_cmp(&(pcb->remote_ip), &(iphdr->src)) &&
176 ip_addr_cmp(&(pcb->local_ip), &(iphdr->dest))) {
178 /* Move this PCB to the front of the list so that subsequent
179 lookups will be faster (we exploit locality in TCP segment
181 LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb->next != pcb);
183 prev->next = pcb->next;
184 pcb->next = tcp_active_pcbs;
185 tcp_active_pcbs = pcb;
187 LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb->next != pcb);
194 /* If it did not go to an active connection, we check the connections
195 in the TIME-WAIT state. */
196 for(pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
197 LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
198 if (pcb->remote_port == tcphdr->src &&
199 pcb->local_port == tcphdr->dest &&
200 ip_addr_cmp(&(pcb->remote_ip), &(iphdr->src)) &&
201 ip_addr_cmp(&(pcb->local_ip), &(iphdr->dest))) {
202 /* We don't really care enough to move this PCB to the front
203 of the list since we are not very likely to receive that
204 many segments for connections in TIME-WAIT. */
205 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for TIME_WAITing connection.\n"));
206 tcp_timewait_input(pcb);
212 /* Finally, if we still did not get a match, we check all PCBs that
213 are LISTENing for incoming connections. */
215 for(lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
216 if ((ip_addr_isany(&(lpcb->local_ip)) ||
217 ip_addr_cmp(&(lpcb->local_ip), &(iphdr->dest))) &&
218 lpcb->local_port == tcphdr->dest) {
219 /* Move this PCB to the front of the list so that subsequent
220 lookups will be faster (we exploit locality in TCP segment
223 ((struct tcp_pcb_listen *)prev)->next = lpcb->next;
224 /* our successor is the remainder of the listening list */
225 lpcb->next = tcp_listen_pcbs.listen_pcbs;
226 /* put this listening pcb at the head of the listening list */
227 tcp_listen_pcbs.listen_pcbs = lpcb;
230 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for LISTENing connection.\n"));
231 tcp_listen_input(lpcb);
235 prev = (struct tcp_pcb *)lpcb;
240 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
241 tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
242 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
243 #endif /* TCP_INPUT_DEBUG */
247 /* The incoming segment belongs to a connection. */
250 tcp_debug_print_state(pcb->state);
251 #endif /* TCP_DEBUG */
252 #endif /* TCP_INPUT_DEBUG */
254 /* Set up a tcp_seg structure. */
256 inseg.len = p->tot_len;
257 inseg.dataptr = p->payload;
259 inseg.tcphdr = tcphdr;
265 err = tcp_process(pcb);
266 tcp_input_pcb = NULL;
267 /* A return value of ERR_ABRT means that tcp_abort() was called
268 and that the pcb has been freed. If so, we don't do anything. */
269 if (err != ERR_ABRT) {
270 if (recv_flags & TF_RESET) {
271 /* TF_RESET means that the connection was reset by the other
272 end. We then call the error callback to inform the
273 application that the connection is dead before we
274 deallocate the PCB. */
275 TCP_EVENT_ERR(pcb->errf, pcb->callback_arg, ERR_RST);
276 tcp_pcb_remove(&tcp_active_pcbs, pcb);
277 memp_free(MEMP_TCP_PCB, pcb);
278 } else if (recv_flags & TF_CLOSED) {
279 /* The connection has been closed and we will deallocate the
281 tcp_pcb_remove(&tcp_active_pcbs, pcb);
282 memp_free(MEMP_TCP_PCB, pcb);
285 /* If the application has registered a "sent" function to be
286 called when new send buffer space is available, we call it
288 if (pcb->acked > 0) {
289 TCP_EVENT_SENT(pcb, pcb->acked, err);
292 if (recv_data != NULL) {
293 /* Notify application that data has been received. */
294 TCP_EVENT_RECV(pcb, recv_data, ERR_OK, err);
297 /* If a FIN segment was received, we call the callback
298 function with a NULL buffer to indicate EOF. */
299 if (recv_flags & TF_GOT_FIN) {
300 TCP_EVENT_RECV(pcb, NULL, ERR_OK, err);
302 /* If there were no errors, we try to send something out. */
310 /* give up our reference to inseg.p */
318 tcp_debug_print_state(pcb->state);
319 #endif /* TCP_DEBUG */
320 #endif /* TCP_INPUT_DEBUG */
324 /* If no matching PCB was found, send a TCP RST (reset) to the
326 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_input: no PCB match found, resetting.\n"));
327 if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) {
328 TCP_STATS_INC(tcp.proterr);
329 TCP_STATS_INC(tcp.drop);
330 tcp_rst(ackno, seqno + tcplen,
331 &(iphdr->dest), &(iphdr->src),
332 tcphdr->dest, tcphdr->src);
337 LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
338 PERF_STOP("tcp_input");
341 /* tcp_listen_input():
343 * Called by tcp_input() when a segment arrives for a listening
348 tcp_listen_input(struct tcp_pcb_listen *pcb)
350 struct tcp_pcb *npcb;
353 /* In the LISTEN state, we check for incoming SYN segments,
354 creates a new PCB, and responds with a SYN|ACK. */
355 if (flags & TCP_ACK) {
356 /* For incoming segments with the ACK flag set, respond with a
358 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
359 tcp_rst(ackno + 1, seqno + tcplen,
360 &(iphdr->dest), &(iphdr->src),
361 tcphdr->dest, tcphdr->src);
362 } else if (flags & TCP_SYN) {
363 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection request %"U16_F" -> %"U16_F".\n", tcphdr->src, tcphdr->dest));
364 npcb = tcp_alloc(pcb->prio);
365 /* If a new PCB could not be created (probably due to lack of memory),
366 we don't do anything, but rely on the sender will retransmit the
367 SYN at a time when we have more memory available. */
369 LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: could not allocate PCB\n"));
370 TCP_STATS_INC(tcp.memerr);
373 /* Set up the new PCB. */
374 ip_addr_set(&(npcb->local_ip), &(iphdr->dest));
375 npcb->local_port = pcb->local_port;
376 ip_addr_set(&(npcb->remote_ip), &(iphdr->src));
377 npcb->remote_port = tcphdr->src;
378 npcb->state = SYN_RCVD;
379 npcb->rcv_nxt = seqno + 1;
380 npcb->snd_wnd = tcphdr->wnd;
381 npcb->ssthresh = npcb->snd_wnd;
382 npcb->snd_wl1 = seqno - 1;/* initialise to seqno-1 to force window update */
383 npcb->callback_arg = pcb->callback_arg;
384 #if LWIP_CALLBACK_API
385 npcb->accept = pcb->accept;
386 #endif /* LWIP_CALLBACK_API */
387 /* inherit socket options */
388 npcb->so_options = pcb->so_options & (SOF_DEBUG|SOF_DONTROUTE|SOF_KEEPALIVE|SOF_OOBINLINE|SOF_LINGER);
389 /* Register the new PCB so that we can begin receiving segments
391 TCP_REG(&tcp_active_pcbs, npcb);
393 /* Parse any options in the SYN. */
396 snmp_inc_tcppassiveopens();
398 /* Build an MSS option. */
399 optdata = htonl(((u32_t)2 << 24) |
401 (((u32_t)npcb->mss / 256) << 8) |
403 /* Send a SYN|ACK together with the MSS option. */
404 tcp_enqueue(npcb, NULL, 0, TCP_SYN | TCP_ACK, 0, (u8_t *)&optdata, 4);
405 return tcp_output(npcb);
410 /* tcp_timewait_input():
412 * Called by tcp_input() when a segment arrives for a connection in
417 tcp_timewait_input(struct tcp_pcb *pcb)
419 if (TCP_SEQ_GT(seqno + tcplen, pcb->rcv_nxt)) {
420 pcb->rcv_nxt = seqno + tcplen;
425 return tcp_output(pcb);
430 * Implements the TCP state machine. Called by tcp_input. In some
431 * states tcp_receive() is called to receive data. The tcp_seg
432 * argument will be freed by the caller (tcp_input()) unless the
433 * recv_data pointer in the pcb is set.
437 tcp_process(struct tcp_pcb *pcb)
439 struct tcp_seg *rseg;
446 /* Process incoming RST segments. */
447 if (flags & TCP_RST) {
448 /* First, determine if the reset is acceptable. */
449 if (pcb->state == SYN_SENT) {
450 if (ackno == pcb->snd_nxt) {
454 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
455 TCP_SEQ_LEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
457 if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt+pcb->rcv_wnd)) {
463 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: Connection RESET\n"));
464 LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb->state != CLOSED);
465 recv_flags = TF_RESET;
466 pcb->flags &= ~TF_ACK_DELAY;
469 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
470 seqno, pcb->rcv_nxt));
471 LWIP_DEBUGF(TCP_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
472 seqno, pcb->rcv_nxt));
477 /* Update the PCB (in)activity timer. */
478 pcb->tmr = tcp_ticks;
481 /* Do different things depending on the TCP state. */
482 switch (pcb->state) {
484 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("SYN-SENT: ackno %"U32_F" pcb->snd_nxt %"U32_F" unacked %"U32_F"\n", ackno,
485 pcb->snd_nxt, ntohl(pcb->unacked->tcphdr->seqno)));
486 /* received SYN ACK with expected sequence number? */
487 if ((flags & TCP_ACK) && (flags & TCP_SYN)
488 && ackno == ntohl(pcb->unacked->tcphdr->seqno) + 1) {
490 pcb->rcv_nxt = seqno + 1;
491 pcb->lastack = ackno;
492 pcb->snd_wnd = tcphdr->wnd;
493 pcb->snd_wl1 = seqno - 1; /* initialise to seqno - 1 to force window update */
494 pcb->state = ESTABLISHED;
495 pcb->cwnd = pcb->mss;
497 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_process: SYN-SENT --queuelen %"U16_F"\n", (u16_t)pcb->snd_queuelen));
499 pcb->unacked = rseg->next;
502 /* Parse any options in the SYNACK. */
505 /* Call the user specified function to call when sucessfully
507 TCP_EVENT_CONNECTED(pcb, ERR_OK, err);
510 /* received ACK? possibly a half-open connection */
511 else if (flags & TCP_ACK) {
512 /* send a RST to bring the other side in a non-synchronized state. */
513 tcp_rst(ackno, seqno + tcplen, &(iphdr->dest), &(iphdr->src),
514 tcphdr->dest, tcphdr->src);
518 if (flags & TCP_ACK &&
519 !(flags & TCP_RST)) {
520 /* expected ACK number? */
521 if (TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_nxt)) {
522 pcb->state = ESTABLISHED;
523 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection established %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
524 #if LWIP_CALLBACK_API
525 LWIP_ASSERT("pcb->accept != NULL", pcb->accept != NULL);
527 /* Call the accept function. */
528 TCP_EVENT_ACCEPT(pcb, ERR_OK, err);
530 /* If the accept function returns with an error, we abort
535 /* If there was any data contained within this ACK,
536 * we'd better pass it on to the application as well. */
538 pcb->cwnd = pcb->mss;
540 /* incorrect ACK number */
543 tcp_rst(ackno, seqno + tcplen, &(iphdr->dest), &(iphdr->src),
544 tcphdr->dest, tcphdr->src);
551 accepted_inseq = tcp_receive(pcb);
552 if ((flags & TCP_FIN) && accepted_inseq) { /* passive close */
554 pcb->state = CLOSE_WAIT;
559 if (flags & TCP_FIN) {
560 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
561 LWIP_DEBUGF(TCP_DEBUG,
562 ("TCP connection closed %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
565 TCP_RMV(&tcp_active_pcbs, pcb);
566 pcb->state = TIME_WAIT;
567 TCP_REG(&tcp_tw_pcbs, pcb);
570 pcb->state = CLOSING;
572 } else if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
573 pcb->state = FIN_WAIT_2;
578 if (flags & TCP_FIN) {
579 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
582 TCP_RMV(&tcp_active_pcbs, pcb);
583 pcb->state = TIME_WAIT;
584 TCP_REG(&tcp_tw_pcbs, pcb);
589 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
590 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
593 TCP_RMV(&tcp_active_pcbs, pcb);
594 pcb->state = TIME_WAIT;
595 TCP_REG(&tcp_tw_pcbs, pcb);
600 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
601 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
603 recv_flags = TF_CLOSED;
614 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
615 * data, and if so frees the memory of the buffered data. Next, is places the
616 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
617 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
618 * i it has been removed from the buffer.
620 * If the incoming segment constitutes an ACK for a segment that was used for RTT
621 * estimation, the RTT is estimated here as well.
627 tcp_receive(struct tcp_pcb *pcb)
629 struct tcp_seg *next;
631 struct tcp_seg *prev, *cseg;
636 u32_t right_wnd_edge;
638 u8_t accepted_inseq = 0;
640 if (flags & TCP_ACK) {
641 right_wnd_edge = pcb->snd_wnd + pcb->snd_wl1;
644 if (TCP_SEQ_LT(pcb->snd_wl1, seqno) ||
645 (pcb->snd_wl1 == seqno && TCP_SEQ_LT(pcb->snd_wl2, ackno)) ||
646 (pcb->snd_wl2 == ackno && tcphdr->wnd > pcb->snd_wnd)) {
647 pcb->snd_wnd = tcphdr->wnd;
648 pcb->snd_wl1 = seqno;
649 pcb->snd_wl2 = ackno;
650 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: window update %"U32_F"\n", pcb->snd_wnd));
653 if (pcb->snd_wnd != tcphdr->wnd) {
654 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: no window update lastack %"U32_F" snd_max %"U32_F" ackno %"U32_F" wl1 %"U32_F" seqno %"U32_F" wl2 %"U32_F"\n",
655 pcb->lastack, pcb->snd_max, ackno, pcb->snd_wl1, seqno, pcb->snd_wl2));
657 #endif /* TCP_WND_DEBUG */
660 if (pcb->lastack == ackno) {
663 if (pcb->snd_wl1 + pcb->snd_wnd == right_wnd_edge){
665 if (pcb->dupacks >= 3 && pcb->unacked != NULL) {
666 if (!(pcb->flags & TF_INFR)) {
667 /* This is fast retransmit. Retransmit the first unacked segment. */
668 LWIP_DEBUGF(TCP_FR_DEBUG, ("tcp_receive: dupacks %"U16_F" (%"U32_F"), fast retransmit %"U32_F"\n",
669 (u16_t)pcb->dupacks, pcb->lastack,
670 ntohl(pcb->unacked->tcphdr->seqno)));
672 /* Set ssthresh to max (FlightSize / 2, 2*SMSS) */
673 /*pcb->ssthresh = LWIP_MAX((pcb->snd_max -
676 /* Set ssthresh to half of the minimum of the currenct cwnd and the advertised window */
677 if (pcb->cwnd > pcb->snd_wnd)
678 pcb->ssthresh = pcb->snd_wnd / 2;
680 pcb->ssthresh = pcb->cwnd / 2;
682 pcb->cwnd = pcb->ssthresh + 3 * pcb->mss;
683 pcb->flags |= TF_INFR;
685 /* Inflate the congestion window, but not if it means that
686 the value overflows. */
687 if ((u16_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
688 pcb->cwnd += pcb->mss;
693 LWIP_DEBUGF(TCP_FR_DEBUG, ("tcp_receive: dupack averted %"U32_F" %"U32_F"\n",
694 pcb->snd_wl1 + pcb->snd_wnd, right_wnd_edge));
697 /*if (TCP_SEQ_LT(pcb->lastack, ackno) &&
698 TCP_SEQ_LEQ(ackno, pcb->snd_max)) { */
699 if (TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_max)){
700 /* We come here when the ACK acknowledges new data. */
702 /* Reset the "IN Fast Retransmit" flag, since we are no longer
703 in fast retransmit. Also reset the congestion window to the
704 slow start threshold. */
705 if (pcb->flags & TF_INFR) {
706 pcb->flags &= ~TF_INFR;
707 pcb->cwnd = pcb->ssthresh;
710 /* Reset the number of retransmissions. */
713 /* Reset the retransmission time-out. */
714 pcb->rto = (pcb->sa >> 3) + pcb->sv;
716 /* Update the send buffer space. */
717 pcb->acked = ackno - pcb->lastack;
719 pcb->snd_buf += pcb->acked;
721 /* Reset the fast retransmit variables. */
723 pcb->lastack = ackno;
725 /* Update the congestion control variables (cwnd and
727 if (pcb->state >= ESTABLISHED) {
728 if (pcb->cwnd < pcb->ssthresh) {
729 if ((u16_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
730 pcb->cwnd += pcb->mss;
732 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %"U16_F"\n", pcb->cwnd));
734 u16_t new_cwnd = (pcb->cwnd + pcb->mss * pcb->mss / pcb->cwnd);
735 if (new_cwnd > pcb->cwnd) {
736 pcb->cwnd = new_cwnd;
738 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: congestion avoidance cwnd %"U16_F"\n", pcb->cwnd));
741 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: ACK for %"U32_F", unacked->seqno %"U32_F":%"U32_F"\n",
743 pcb->unacked != NULL?
744 ntohl(pcb->unacked->tcphdr->seqno): 0,
745 pcb->unacked != NULL?
746 ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked): 0));
748 /* Remove segment from the unacknowledged list if the incoming
749 ACK acknowlegdes them. */
750 while (pcb->unacked != NULL &&
751 TCP_SEQ_LEQ(ntohl(pcb->unacked->tcphdr->seqno) +
752 TCP_TCPLEN(pcb->unacked), ackno)) {
753 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %"U32_F":%"U32_F" from pcb->unacked\n",
754 ntohl(pcb->unacked->tcphdr->seqno),
755 ntohl(pcb->unacked->tcphdr->seqno) +
756 TCP_TCPLEN(pcb->unacked)));
759 pcb->unacked = pcb->unacked->next;
761 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %"U16_F" ... ", (u16_t)pcb->snd_queuelen));
762 pcb->snd_queuelen -= pbuf_clen(next->p);
765 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%"U16_F" (after freeing unacked)\n", (u16_t)pcb->snd_queuelen));
766 if (pcb->snd_queuelen != 0) {
767 LWIP_ASSERT("tcp_receive: valid queue length", pcb->unacked != NULL ||
768 pcb->unsent != NULL);
774 /* We go through the ->unsent list to see if any of the segments
775 on the list are acknowledged by the ACK. This may seem
776 strange since an "unsent" segment shouldn't be acked. The
777 rationale is that lwIP puts all outstanding segments on the
778 ->unsent list after a retransmission, so these segments may
779 in fact have been sent once. */
780 while (pcb->unsent != NULL &&
781 /*TCP_SEQ_LEQ(ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent), ackno) &&
782 TCP_SEQ_LEQ(ackno, pcb->snd_max)*/
783 TCP_SEQ_BETWEEN(ackno, ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent), pcb->snd_max)
785 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %"U32_F":%"U32_F" from pcb->unsent\n",
786 ntohl(pcb->unsent->tcphdr->seqno), ntohl(pcb->unsent->tcphdr->seqno) +
787 TCP_TCPLEN(pcb->unsent)));
790 pcb->unsent = pcb->unsent->next;
791 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %"U16_F" ... ", (u16_t)pcb->snd_queuelen));
792 pcb->snd_queuelen -= pbuf_clen(next->p);
794 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%"U16_F" (after freeing unsent)\n", (u16_t)pcb->snd_queuelen));
795 if (pcb->snd_queuelen != 0) {
796 LWIP_ASSERT("tcp_receive: valid queue length",
797 pcb->unacked != NULL || pcb->unsent != NULL);
800 if (pcb->unsent != NULL) {
801 pcb->snd_nxt = htonl(pcb->unsent->tcphdr->seqno);
804 /* End of ACK for new data processing. */
806 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: pcb->rttest %"U32_F" rtseq %"U32_F" ackno %"U32_F"\n",
807 pcb->rttest, pcb->rtseq, ackno));
809 /* RTT estimation calculations. This is done by checking if the
810 incoming segment acknowledges the segment we use to take a
811 round-trip time measurement. */
812 if (pcb->rttest && TCP_SEQ_LT(pcb->rtseq, ackno)) {
813 m = tcp_ticks - pcb->rttest;
815 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: experienced rtt %"U16_F" ticks (%"U16_F" msec).\n",
816 m, m * TCP_SLOW_INTERVAL));
818 /* This is taken directly from VJs original code in his paper */
819 m = m - (pcb->sa >> 3);
824 m = m - (pcb->sv >> 2);
826 pcb->rto = (pcb->sa >> 3) + pcb->sv;
828 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: RTO %"U16_F" (%"U16_F" miliseconds)\n",
829 pcb->rto, pcb->rto * TCP_SLOW_INTERVAL));
835 /* If the incoming segment contains data, we must process it
838 /* This code basically does three things:
840 +) If the incoming segment contains data that is the next
841 in-sequence data, this data is passed to the application. This
842 might involve trimming the first edge of the data. The rcv_nxt
843 variable and the advertised window are adjusted.
845 +) If the incoming segment has data that is above the next
846 sequence number expected (->rcv_nxt), the segment is placed on
847 the ->ooseq queue. This is done by finding the appropriate
848 place in the ->ooseq queue (which is ordered by sequence
849 number) and trim the segment in both ends if needed. An
850 immediate ACK is sent to indicate that we received an
851 out-of-sequence segment.
853 +) Finally, we check if the first segment on the ->ooseq queue
854 now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
855 rcv_nxt > ooseq->seqno, we must trim the first edge of the
856 segment on ->ooseq before we adjust rcv_nxt. The data in the
857 segments that are now on sequence are chained onto the
858 incoming segment so that we only need to call the application
862 /* First, we check if we must trim the first edge. We have to do
863 this if the sequence number of the incoming segment is less
864 than rcv_nxt, and the sequence number plus the length of the
865 segment is larger than rcv_nxt. */
866 /* if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)){
867 if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {*/
868 if (TCP_SEQ_BETWEEN(pcb->rcv_nxt, seqno + 1, seqno + tcplen - 1)){
869 /* Trimming the first edge is done by pushing the payload
870 pointer in the pbuf downwards. This is somewhat tricky since
871 we do not want to discard the full contents of the pbuf up to
872 the new starting point of the data since we have to keep the
873 TCP header which is present in the first pbuf in the chain.
875 What is done is really quite a nasty hack: the first pbuf in
876 the pbuf chain is pointed to by inseg.p. Since we need to be
877 able to deallocate the whole pbuf, we cannot change this
878 inseg.p pointer to point to any of the later pbufs in the
879 chain. Instead, we point the ->payload pointer in the first
880 pbuf to data in one of the later pbufs. We also set the
881 inseg.data pointer to point to the right place. This way, the
882 ->p pointer will still point to the first pbuf, but the
883 ->p->payload pointer will point to data in another pbuf.
885 After we are done with adjusting the pbuf pointers we must
886 adjust the ->data pointer in the seg and the segment
889 off = pcb->rcv_nxt - seqno;
891 LWIP_ASSERT("inseg.p != NULL", inseg.p);
892 if (inseg.p->len < off) {
893 new_tot_len = inseg.p->tot_len - off;
894 while (p->len < off) {
896 /* KJM following line changed (with addition of new_tot_len var)
898 inseg.p->tot_len -= p->len; */
899 p->tot_len = new_tot_len;
903 pbuf_header(p, -off);
905 pbuf_header(inseg.p, -off);
907 /* KJM following line changed to use p->payload rather than inseg->p->payload
909 inseg.dataptr = p->payload;
910 inseg.len -= pcb->rcv_nxt - seqno;
911 inseg.tcphdr->seqno = seqno = pcb->rcv_nxt;
914 if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)){
915 /* the whole segment is < rcv_nxt */
916 /* must be a duplicate of a packet that has already been correctly handled */
918 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: duplicate seqno %"U32_F"\n", seqno));
923 /* The sequence number must be within the window (above rcv_nxt
924 and below rcv_nxt + rcv_wnd) in order to be further
926 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
927 TCP_SEQ_LT(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
928 if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd - 1)){
929 if (pcb->rcv_nxt == seqno) {
931 /* The incoming segment is the next in sequence. We check if
932 we have to trim the end of the segment and update rcv_nxt
933 and pass the data to the application. */
935 if (pcb->ooseq != NULL &&
936 TCP_SEQ_LEQ(pcb->ooseq->tcphdr->seqno, seqno + inseg.len)) {
937 /* We have to trim the second edge of the incoming
939 inseg.len = pcb->ooseq->tcphdr->seqno - seqno;
940 pbuf_realloc(inseg.p, inseg.len);
942 #endif /* TCP_QUEUE_OOSEQ */
944 tcplen = TCP_TCPLEN(&inseg);
946 /* First received FIN will be ACKed +1, on any successive (duplicate)
947 * FINs we are already in CLOSE_WAIT and have already done +1.
949 if (pcb->state != CLOSE_WAIT) {
950 pcb->rcv_nxt += tcplen;
953 /* Update the receiver's (our) window. */
954 if (pcb->rcv_wnd < tcplen) {
957 pcb->rcv_wnd -= tcplen;
960 /* If there is data in the segment, we make preparations to
961 pass this up to the application. The ->recv_data variable
962 is used for holding the pbuf that goes to the
963 application. The code for reassembling out-of-sequence data
964 chains its data on this pbuf as well.
966 If the segment was a FIN, we set the TF_GOT_FIN flag that will
967 be used to indicate to the application that the remote side has
968 closed its end of the connection. */
969 if (inseg.p->tot_len > 0) {
971 /* Since this pbuf now is the responsibility of the
972 application, we delete our reference to it so that we won't
973 (mistakingly) deallocate it. */
976 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
977 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: received FIN.\n"));
978 recv_flags = TF_GOT_FIN;
982 /* We now check if we have segments on the ->ooseq queue that
983 is now in sequence. */
984 while (pcb->ooseq != NULL &&
985 pcb->ooseq->tcphdr->seqno == pcb->rcv_nxt) {
988 seqno = pcb->ooseq->tcphdr->seqno;
990 pcb->rcv_nxt += TCP_TCPLEN(cseg);
991 if (pcb->rcv_wnd < TCP_TCPLEN(cseg)) {
994 pcb->rcv_wnd -= TCP_TCPLEN(cseg);
996 if (cseg->p->tot_len > 0) {
997 /* Chain this pbuf onto the pbuf that we will pass to
1000 pbuf_cat(recv_data, cseg->p);
1002 recv_data = cseg->p;
1006 if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
1007 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: dequeued FIN.\n"));
1008 recv_flags = TF_GOT_FIN;
1009 if (pcb->state == ESTABLISHED) { /* force passive close or we can move to active close */
1010 pcb->state = CLOSE_WAIT;
1015 pcb->ooseq = cseg->next;
1018 #endif /* TCP_QUEUE_OOSEQ */
1021 /* Acknowledge the segment(s). */
1025 /* We get here if the incoming segment is out-of-sequence. */
1028 /* We queue the segment on the ->ooseq queue. */
1029 if (pcb->ooseq == NULL) {
1030 pcb->ooseq = tcp_seg_copy(&inseg);
1032 /* If the queue is not empty, we walk through the queue and
1033 try to find a place where the sequence number of the
1034 incoming segment is between the sequence numbers of the
1035 previous and the next segment on the ->ooseq queue. That is
1036 the place where we put the incoming segment. If needed, we
1037 trim the second edges of the previous and the incoming
1038 segment so that it will fit into the sequence.
1040 If the incoming segment has the same sequence number as a
1041 segment on the ->ooseq queue, we discard the segment that
1042 contains less data. */
1045 for(next = pcb->ooseq; next != NULL; next = next->next) {
1046 if (seqno == next->tcphdr->seqno) {
1047 /* The sequence number of the incoming segment is the
1048 same as the sequence number of the segment on
1049 ->ooseq. We check the lengths to see which one to
1051 if (inseg.len > next->len) {
1052 /* The incoming segment is larger than the old
1053 segment. We replace the old segment with the new
1055 cseg = tcp_seg_copy(&inseg);
1057 cseg->next = next->next;
1066 /* Either the lenghts are the same or the incoming
1067 segment was smaller than the old one; in either
1068 case, we ditch the incoming segment. */
1073 if (TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
1074 /* The sequence number of the incoming segment is lower
1075 than the sequence number of the first segment on the
1076 queue. We put the incoming segment first on the
1079 if (TCP_SEQ_GT(seqno + inseg.len, next->tcphdr->seqno)) {
1080 /* We need to trim the incoming segment. */
1081 inseg.len = next->tcphdr->seqno - seqno;
1082 pbuf_realloc(inseg.p, inseg.len);
1084 cseg = tcp_seg_copy(&inseg);
1092 /*if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
1093 TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {*/
1094 if(TCP_SEQ_BETWEEN(seqno, prev->tcphdr->seqno+1, next->tcphdr->seqno-1)){
1095 /* The sequence number of the incoming segment is in
1096 between the sequence numbers of the previous and
1097 the next segment on ->ooseq. We trim and insert the
1098 incoming segment and trim the previous segment, if
1100 if (TCP_SEQ_GT(seqno + inseg.len, next->tcphdr->seqno)) {
1101 /* We need to trim the incoming segment. */
1102 inseg.len = next->tcphdr->seqno - seqno;
1103 pbuf_realloc(inseg.p, inseg.len);
1106 cseg = tcp_seg_copy(&inseg);
1110 if (TCP_SEQ_GT(prev->tcphdr->seqno + prev->len, seqno)) {
1111 /* We need to trim the prev segment. */
1112 prev->len = seqno - prev->tcphdr->seqno;
1113 pbuf_realloc(prev->p, prev->len);
1118 /* If the "next" segment is the last segment on the
1119 ooseq queue, we add the incoming segment to the end
1121 if (next->next == NULL &&
1122 TCP_SEQ_GT(seqno, next->tcphdr->seqno)) {
1123 next->next = tcp_seg_copy(&inseg);
1124 if (next->next != NULL) {
1125 if (TCP_SEQ_GT(next->tcphdr->seqno + next->len, seqno)) {
1126 /* We need to trim the last segment. */
1127 next->len = seqno - next->tcphdr->seqno;
1128 pbuf_realloc(next->p, next->len);
1137 #endif /* TCP_QUEUE_OOSEQ */
1141 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1142 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1143 if(!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd-1)){
1148 /* Segments with length 0 is taken care of here. Segments that
1149 fall out of the window are ACKed. */
1150 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1151 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1152 if(!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd-1)){
1156 return accepted_inseq;
1162 * Parses the options contained in the incoming segment. (Code taken
1163 * from uIP with only small changes.)
1168 tcp_parseopt(struct tcp_pcb *pcb)
1174 opts = (u8_t *)tcphdr + TCP_HLEN;
1176 /* Parse the TCP MSS option, if present. */
1177 if(TCPH_HDRLEN(tcphdr) > 0x5) {
1178 for(c = 0; c < (TCPH_HDRLEN(tcphdr) - 5) << 2 ;) {
1181 /* End of options. */
1183 } else if (opt == 0x01) {
1186 } else if (opt == 0x02 &&
1187 opts[c + 1] == 0x04) {
1188 /* An MSS option with the right option length. */
1189 mss = (opts[c + 2] << 8) | opts[c + 3];
1190 pcb->mss = mss > TCP_MSS? TCP_MSS: mss;
1192 /* And we are done processing options. */
1195 if (opts[c + 1] == 0) {
1196 /* If the length field is zero, the options are malformed
1197 and we don't process them further. */
1200 /* All other options have a length field, so that we easily
1201 can skip past them. */
1207 #endif /* LWIP_TCP */